YONI fungal ITS2: making phyloseq object, calculating diversities and annotating FUNGuild

STEP 1: making phyloseq object

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("readxl")
library(tidyverse)

1. import OTUs and taxa

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\YONI_ITS_analyses\\R_ITS_yoni\\Analyses_final\\RE_ANNOTATION_2024')

otu <- read.table("ITS_OTUs_mod.txt", sep="\t", header=FALSE)
tax <- read.table("ITS_OTUs.UNITEv10_sh_99.wang.taxonomy", sep="\t", header=FALSE)

More (4) observations (OTUS) in tax? check what is the deal. Get OTU names

dif <- setdiff(tax$V1, otu$V1) 
dif
## [1] "OTU57653" "OTU58305"

These OTUs are missing from OTU table but are present in tax table? Let’s remove them form tax table Also, lets tweek the table row names and columns

# modify otu table

colnames(otu)=otu[c(1),]
# erase the first row, as now it is doubled
otu=otu[-c(1),]

# make first column into rownames
rownames(otu) <- otu$`OTU ID`
otu <- otu[, -c(1)]
# let's make a copy of tax table
tax.orig <- tax

# change column names
colnames(tax)[1] <- "OTU"
colnames(tax)[2] <- "taxa"

# and modify the tax, where in the str_remove_all . means any single character
tax <- tax %>%
  mutate(taxa = str_remove_all(taxa, ".__")) %>%
  separate(taxa,
           into = c("kingdom", "phylum", "class", "order", "family", "genus", "species"),
           sep = ";")

Everything went well but I got a warning message, because there are an extra ; at the end of the line so the last column is not made. which is correct. Let’s check if we now have NAs, just to check everything is ok.

sum(is.na(tax$OTU))
## [1] 0
sum(is.na(tax$kingdom))
## [1] 0
sum(is.na(tax$phylum))
## [1] 0
sum(is.na(tax$class))
## [1] 0
sum(is.na(tax$order))
## [1] 0
sum(is.na(tax$family))
## [1] 0
sum(is.na(tax$genus))
## [1] 0
sum(is.na(tax$species))
## [1] 0

Let’s remove the parantheses and numbers

tax$kingdom <- sub("\\(.*", "", tax$kingdom)
tax$phylum <- sub("\\(.*", "", tax$phylum)
tax$class <- sub("\\(.*", "", tax$class)
tax$order <- sub("\\(.*", "", tax$order)
tax$family <- sub("\\(.*", "", tax$family)
tax$genus <- sub("\\(.*", "", tax$genus)
tax$species <- sub("\\(.*", "", tax$species)

Check unique values of the higher taxons

unique(tax$kingdom)
## [1] "Fungi"
unique(tax$phylum)
##  [1] "Ascomycota"            "Basidiomycota"         "Fungi_unclassified"   
##  [4] "Rozellomycota"         "Basidiobolomycota"     "Chytridiomycota"      
##  [7] "Mortierellomycota"     "unclassified"          "Glomeromycota"        
## [10] "Mucoromycota"          "Zoopagomycota"         "Monoblepharomycota"   
## [13] "Olpidiomycota"         "Kickxellomycota"       "Entorrhizomycota"     
## [16] "Aphelidiomycota"       "Neocallimastigomycota"

1.2. remove the extra OTUs in tax table

“OTU57653” “OTU58305”

tax <- subset(tax, OTU!="OTU57653")
tax <- subset(tax, OTU!="OTU58305")

2. import meta data

samples <- read.csv2('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\YONI_ITS_analyses\\R_ITS_yoni\\Analyses_final\\RE_ANNOTATION_2024\\sample_data_updated_root_biomass.csv')
# change some column names
colnames(samples)[which(names(samples) == "production_type")] <- "sample_type"

2.1. change otu table sample names

x <- otu

x <- t(x)
x <- as.data.frame(x)
x <- rownames_to_column(x)

y <- samples[, c(1,2)]
x <- left_join(y, x, by = c("ID"="rowname"))
x <- x[, -c(1)]
rownames(x) <- x$sampleID
x <- x[, -c(1)]
x <- t(x)
otu <- x
rm(x)
rm(y)
library(dplyr)
otu <- as.data.frame(otu)
otu <- otu %>% mutate_if(is.character, as.numeric)

3. make phyloseq object

rownames(tax) <- tax$OTU
tax <- tax[, -c(1)]
rownames(samples) <- samples$sampleID
samples <- samples[, -c(1)]
samples <- sample_data(samples)
otu <- as.matrix(otu)
otu <- otu_table(otu, taxa_are_rows = TRUE)
tax <- tax_table(as.matrix(tax))

ps <- phyloseq(otu, tax, samples)
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 70198 taxa and 142 samples ]
## sample_data() Sample Data:       [ 142 samples by 22 sample variables ]
## tax_table()   Taxonomy Table:    [ 70198 taxa by 7 taxonomic ranks ]

remove unclassified phyla and controls

Fungi_unclassified unclassified

ps <- subset_samples(ps, sampleID != "0ctrl-1")
ps <- subset_samples(ps, sampleID != "0ctrl-2")
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 70198 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 22 sample variables ]
## tax_table()   Taxonomy Table:    [ 70198 taxa by 7 taxonomic ranks ]
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps),
              MARGIN = ifelse(taxa_are_rows(ps), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps = prune_taxa((prev0 > 0), ps)
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 70198 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 22 sample variables ]
## tax_table()   Taxonomy Table:    [ 70198 taxa by 7 taxonomic ranks ]
rm(prev0)
rm(tax.orig)

#I will remove two: "Fungi_unclassified" and "unclassified"
ps <- subset_taxa(ps, phylum != "Fungi_unclassified" & phylum != "unclassified")
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 68186 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 22 sample variables ]
## tax_table()   Taxonomy Table:    [ 68186 taxa by 7 taxonomic ranks ]

4. save phyloseq with sng

ps_sng <- ps
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
save(ps_sng, file='ps_phyloseq_with_sng')

5. Remove sngs and save

ps <- filter_taxa(ps, function (x) {sum(x > 0) > 1}, prune=TRUE)

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
save(ps, file='ps_FINAL')

ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 22 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
summarize_phyloseq(ps)
## [[1]]
## [1] "1] Min. number of reads = 44"
## 
## [[2]]
## [1] "2] Max. number of reads = 410851"
## 
## [[3]]
## [1] "3] Total number of reads = 11539503"
## 
## [[4]]
## [1] "4] Average number of reads = 82425.0214285714"
## 
## [[5]]
## [1] "5] Median number of reads = 79653.5"
## 
## [[6]]
## [1] "7] Sparsity = 0.928178415470992"
## 
## [[7]]
## [1] "6] Any OTU sum to 1 or less? NO"
## 
## [[8]]
## [1] "8] Number of singletons = 0"
## 
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n        (i.e. exactly one read detected across all samples)0"
## 
## [[10]]
## [1] "10] Number of sample variables are: 22"
## 
## [[11]]
##  [1] "sampleID"            "plot"                "sampling_position"  
##  [4] "actual_sample_depth" "depth"               "depth_numerical"    
##  [7] "vegetation"          "sample_type"         "root_mgg"           
## [10] "pH_H2O"              "EC_uScm"             "C_g_per_kg"         
## [13] "N_gkg"               "TP_gkg"              "Alox_mmolkg"        
## [16] "Feox_mmolkg"         "oxides_mmolkg"       "PH2O_mgkg"          
## [19] "Porg_mgkg"           "DOC_mgkg"            "Pinorg_mgkg"        
## [22] "C_per_N"

6. Check low readcounts

readcount(ps)
##   CG9.1_0to10  CG9.1_10to20  CG9.1_20to30  CG9.1_30to40  CG9.1_40to70 
##         86263         86079        104971         70687            44 
##   CG9.2_0to10  CG9.2_10to20  CG9.2_20to30  CG9.2_30to40  CG9.2_40to80 
##         93325         99797         71502         33354         12765 
##   CG9.3_0to10  CG9.3_10to20  CG9.3_20to30  CG9.3_30to40  CG9.3_40to70 
##         80231         84314         95845        119875          8588 
##  CPO5.1_0to10 CPO5.1_10to20 CPO5.1_20to30 CPO5.1_30to40 CPO5.1_40to70 
##         98378         70183         84869        104719         18979 
##  CPO5.2_0to10 CPO5.2_10to20 CPO5.2_20to30 CPO5.2_30to40 CPO5.2_40to70 
##         71870         95165         58356         13410          7818 
##  CPO5.3_0to10 CPO5.3_10to20 CPO5.3_20to30 CPO5.3_30to40 CPO5.3_40to80 
##         77865         69685         69812        100730         29285 
##  CR14.1_0to10 CR14.1_10to20 CR14.1_20to30 CR14.1_30to40 CR14.1_40to80 
##         87835         83770         45199        191427         10410 
##  CR14.2_0to10 CR14.2_10to20 CR14.2_20to30 CR14.2_30to40 CR14.2_40to70 
##         79191         80650        168434        108506         35667 
##  CR14.3_0to10 CR14.3_10to20 CR14.3_20to30 CR14.3_30to40 CR14.3_40to60 
##         85581         66271         90307        206728         27639 
##      M1_0to10     M1_10to20     M1_20to30     M1_30to40     M1_40to60 
##         20487         79126         80116         65933          9920 
##      M2_0to10     M2_10to20     M2_20to30     M2_30to40     M2_40to60 
##        104802         92361         68349         48693         13643 
##      M3_0to10     M3_10to20     M3_20to30     M3_30to40     M3_40to60 
##         89502         88371         96903         49946         65688 
##   NG2A1_0to10  NG2A1_10to20  NG2A1_20to30  NG2A1_30to40  NG2A1_40to70 
##         63024        150089        215010        410851         21745 
##   NG2A2_0to10  NG2A2_10to20  NG2A2_20to30  NG2A2_30to40  NG2A2_40to70 
##        122936        138014        150337        232098         28984 
##   NG2A3_0to10  NG2A3_10to20  NG2A3_20to30  NG2A3_30to40  NG2A3_40to70 
##         70345        112821        151484         56781         48499 
##   NG2B1_0to10  NG2B1_10to20  NG2B1_20to30  NG2B1_30to40  NG2B1_40to70 
##         71098        133953        156486         56105         45557 
##   NG2B2_0to10  NG2B2_10to20  NG2B2_20to30  NG2B2_30to40  NG2B2_40to70 
##         65605        203928        161140         71344           811 
##   NG2B3_0to10  NG2B3_10to20  NG2B3_20to30  NG2B3_30to40  NG2B3_40to70 
##         91494        103358        122182           526         18249 
##   NG3.2_0to10  NG3.2_10to20  NG3.2_20to30  NG3.2_30to40  NG3.2_40to70 
##         30720        126078         74239        153319         28542 
##   NG3.3_0to10  NG3.3_10to20  NG3.3_20to30  NG3.3_30to40  NG3.3_40to60 
##         15342         69130         56845        119149         72310 
##  OG10.1_0to10 OG10.1_10to20 OG10.1_20to30 OG10.1_30to40 OG10.1_40to80 
##         65025         73499         63754         23026          9882 
##  OG10.2_0to10 OG10.2_10to20 OG10.2_20to30 OG10.2_30to40 OG10.2_40to70 
##        107655         77838         90829         97084         32336 
##  OG10.3_0to10 OG10.3_10to20 OG10.3_20to30 OG10.3_30to40 OG10.3_40to70 
##         75755         80589         99232         67396         27950 
##  OPO6.2_0to10 OPO6.2_10to20 OPO6.2_20to30 OPO6.2_30to40 OPO6.2_40to70 
##         98465         84444         63563        103414         86941 
##  OPO6.3_0to10 OPO6.3_10to20 OPO6.3_20to30 OPO6.3_30to40 OPO6.3_40to70 
##        103969         85383         66316        166823         61540 
##  OR13.1_0to10 OR13.1_10to20 OR13.1_20to30 OR13.1_30to40 OR13.1_40to80 
##         94567        107459        112756         92093         20227 
##  OR13.2_0to10 OR13.2_10to20 OR13.2_20to30 OR13.2_30to40 OR13.2_40to60 
##        113845         22672         29060         12432         15481 
##  OR13.3_0to10 OR13.3_10to20 OR13.3_20to30 OR13.3_30to40 OR13.3_40to60 
##        101629         99800         91229         97335        235463

7. IMPORTANT! Bug in the TAX table!!

I noticed that there is a mistake in the taxonomy, so that sometimes I have “unclassified” annotation at lets say genus level, although I have annotation at lets say family level

In these cases I rather have at the genus level the annotation “family_unclassified”

tax <- as.data.frame(tax_table(ps))

I have no unclassified at phylum level

change first at level:

class

tax[tax$class == "unclassified", "phylum"]
##   [1] "Chytridiomycota"    "Basidiomycota"      "Chytridiomycota"   
##   [4] "Monoblepharomycota" "Rozellomycota"      "Basidiomycota"     
##   [7] "Chytridiomycota"    "Rozellomycota"      "Chytridiomycota"   
##  [10] "Rozellomycota"      "Rozellomycota"      "Chytridiomycota"   
##  [13] "Rozellomycota"      "Rozellomycota"      "Chytridiomycota"   
##  [16] "Rozellomycota"      "Rozellomycota"      "Rozellomycota"     
##  [19] "Rozellomycota"      "Rozellomycota"      "Rozellomycota"     
##  [22] "Rozellomycota"      "Rozellomycota"      "Rozellomycota"     
##  [25] "Rozellomycota"      "Rozellomycota"      "Rozellomycota"     
##  [28] "Basidiomycota"      "Basidiomycota"      "Basidiomycota"     
##  [31] "Rozellomycota"      "Rozellomycota"      "Basidiomycota"     
##  [34] "Basidiomycota"      "Kickxellomycota"    "Rozellomycota"     
##  [37] "Basidiomycota"      "Basidiomycota"      "Rozellomycota"     
##  [40] "Basidiomycota"      "Basidiomycota"      "Rozellomycota"     
##  [43] "Chytridiomycota"    "Rozellomycota"      "Rozellomycota"     
##  [46] "Rozellomycota"      "Rozellomycota"      "Rozellomycota"     
##  [49] "Basidiomycota"      "Rozellomycota"      "Rozellomycota"     
##  [52] "Chytridiomycota"    "Rozellomycota"      "Rozellomycota"     
##  [55] "Chytridiomycota"    "Chytridiomycota"    "Chytridiomycota"   
##  [58] "Chytridiomycota"    "Basidiomycota"      "Rozellomycota"     
##  [61] "Rozellomycota"      "Chytridiomycota"    "Rozellomycota"     
##  [64] "Rozellomycota"      "Rozellomycota"      "Basidiomycota"     
##  [67] "Basidiomycota"      "Rozellomycota"      "Rozellomycota"     
##  [70] "Rozellomycota"      "Rozellomycota"      "Rozellomycota"     
##  [73] "Rozellomycota"      "Rozellomycota"      "Rozellomycota"     
##  [76] "Rozellomycota"      "Rozellomycota"      "Rozellomycota"     
##  [79] "Rozellomycota"      "Rozellomycota"      "Chytridiomycota"   
##  [82] "Rozellomycota"      "Rozellomycota"      "Rozellomycota"     
##  [85] "Rozellomycota"      "Rozellomycota"      "Rozellomycota"     
##  [88] "Chytridiomycota"    "Rozellomycota"      "Rozellomycota"     
##  [91] "Basidiomycota"      "Rozellomycota"      "Rozellomycota"     
##  [94] "Rozellomycota"      "Rozellomycota"      "Basidiomycota"     
##  [97] "Rozellomycota"      "Rozellomycota"      "Rozellomycota"     
## [100] "Chytridiomycota"    "Rozellomycota"      "Rozellomycota"     
## [103] "Basidiomycota"      "Chytridiomycota"    "Rozellomycota"     
## [106] "Basidiomycota"      "Glomeromycota"      "Rozellomycota"     
## [109] "Rozellomycota"      "Rozellomycota"      "Rozellomycota"     
## [112] "Rozellomycota"      "Rozellomycota"      "Rozellomycota"     
## [115] "Rozellomycota"      "Rozellomycota"      "Chytridiomycota"   
## [118] "Basidiomycota"      "Rozellomycota"      "Olpidiomycota"     
## [121] "Chytridiomycota"    "Rozellomycota"      "Monoblepharomycota"
tax$class <- ifelse(tax$class == "unclassified", paste(tax$phylum, sep = "_", "unclassified"), as.character(tax$class))   

And same for all rest ranks, but I have to replace the “_unclassified_unclassified” with “_unclassified” afterwards

order

tax$order <- ifelse(tax$order == "unclassified", paste(tax$class, sep = "_", "unclassified"), as.character(tax$order))   
tax[] <- lapply(tax, gsub, pattern = "unclassified_unclassified", replacement = "unclassified", fixed = TRUE)

family

tax$family <- ifelse(tax$family == "unclassified", paste(tax$order, sep = "_", "unclassified"), as.character(tax$family))   
tax[] <- lapply(tax, gsub, pattern = "unclassified_unclassified", replacement = "unclassified", fixed = TRUE)

genus

tax$genus <- ifelse(tax$genus == "unclassified", paste(tax$family, sep = "_", "unclassified"), as.character(tax$genus))   
tax[] <- lapply(tax, gsub, pattern = "unclassified_unclassified", replacement = "unclassified", fixed = TRUE)

species

tax$species <- ifelse(tax$species == "unclassified", paste(tax$genus, sep = "_", "unclassified"), as.character(tax$species))   
tax[] <- lapply(tax, gsub, pattern = "unclassified_unclassified", replacement = "unclassified", fixed = TRUE)

8. Save

Save the new modified ps as the “final version”

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

tax_table(ps) <- tax_table(as.matrix(tax))

save(ps, file='ps_FINAL')

ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 22 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]

STEP 2: Calculating diversities

library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 22 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)

Richness figures

1. Change metadata numerics

# Column indices to be converted numeric
i <- c(6, 9:22)
meta[, i] <- apply(meta[, i], 2, function(x) as.numeric(as.character(x)))

meta$depth_numerical[meta$depth_numerical == 0] <- 5
meta$depth_numerical[meta$depth_numerical == 10] <- 15
meta$depth_numerical[meta$depth_numerical == 20] <- 25
meta$depth_numerical[meta$depth_numerical == 30] <- 35
meta$depth_numerical[meta$depth_numerical == 40] <- 60

# change natural_grass to meadow
 meta <- data.frame(lapply(meta, function(x) {gsub("Natural_grass", "meadow", x)}))
 rownames(meta) <- meta$sampleID

sample_data(ps) <- sample_data(meta)                

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
save(ps, file='ps_FINAL')

2. Calculate diversities

2.1. First without singletons

div <- microbiome::alpha(ps, index = "all")

# Assign the estimated diversity to sample metadata
sample_data(ps)$observed <- div$observed
sample_data(ps)$chao1 <- div$chao1
sample_data(ps)$shannon <- div$diversity_shannon

meta <- meta(ps)

meta$sample_type <- as.factor(meta$sample_type)

meta$sample_type <- factor(meta$sample_type, levels = c("forest", "meadow", "organic", "conventional"))

# Column indices to be converted numeric
i <- c(6, 9:25)
meta[, i] <- apply(meta[, i], 2, function(x) as.numeric(as.character(x)))

sample_data(ps) <- sample_data(meta)                

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
save(ps, file='ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 25 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]

2.2 with singletons

I will use the one without singletons, but just for comparison, I also calculate with singletons

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_phyloseq_with_sng')

div <- microbiome::alpha(ps_sng, index = "all")

# Assign the estimated diversity to sample metadata
sample_data(ps)$observed_sng <- div$observed
sample_data(ps)$chao1_sng <- div$chao1
sample_data(ps)$shannon_sng <- div$diversity_shannon

meta <- meta(ps)
    
save(ps, file='ps_FINAL')

3. plot diversity

color palette

# create your own color palette for sample types
MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional =  "#b71c1c")

plot depthwise

# OTU richness
OTU_rich <- meta %>%
  dplyr::group_by(sample_type, depth_numerical) %>%
  dplyr::summarise(mean = mean(observed, na.rm = TRUE), se = (sd(observed, na.rm = TRUE)/sqrt(length((observed))))) %>%
  dplyr::ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=18),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="OTU richness") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)

OTU_rich

# shannon
shannon <- meta %>%
  dplyr::group_by(sample_type, depth_numerical) %>%
  dplyr::summarise(mean = mean(shannon, na.rm = TRUE), se = (sd(shannon, na.rm = TRUE)/sqrt(length((shannon))))) %>%
  dplyr::ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=18),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Shannon") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)

shannon

3.2. plot with sng

plot depthwise

# OTU richness
OTU_rich_sng <- meta %>%
  dplyr::group_by(sample_type, depth_numerical) %>%
  dplyr::summarise(mean = mean(observed_sng, na.rm = TRUE), se = (sd(observed_sng, na.rm = TRUE)/sqrt(length((observed_sng))))) %>%
  dplyr::ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=18),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="OTU richness") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)

OTU_rich_sng

# shannon
shannon_sng <- meta %>%
  dplyr::group_by(sample_type, depth_numerical) %>%
  dplyr::summarise(mean = mean(shannon_sng, na.rm = TRUE), se = (sd(shannon_sng, na.rm = TRUE)/sqrt(length((shannon_sng))))) %>%
  dplyr::ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=18),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Shannon") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)

shannon


STEP 3: Annotating OTUs with FUNGuild

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("RColorBrewer") # nice color options
library(multcompView)
library(rcompanion)
library(car)
library(multcomp)
library(stringr)
library(ggrepel)
library(MicEco)
library(metagMisc)

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]

1. Load and save FUNGuild

#FG <- parse_funguild()
#attr(FG, "DownloadDate")  # Check when the database was downloaded
#setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
#save FG
#write.csv2(FG, file = "FUNGuild_31_05_2024.csv", row.names = FALSE)

I have previously uploaded the FUNGuild database and will use that version here for continuity. I used a version downloaded: “Fri May 31 19:45:41 2024”

2. Annotate FUNGuild at different taxonomic levels

I need to annotate separately at different taxonomic level

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\YONI_ITS_analyses\\R_ITS_yoni\\Analyses_final\\RE_ANNOTATION_2024')

FG <- read.csv2("FUNGuild_31_05_2024.csv")

# what different levels there are:
unique(FG$taxonomicLevel)
## [1] "Genus"      "Species"    "Variety"    "Family"     "Order"     
## [6] "Phylum"     "Form"       "Subspecies"

I will annotate with Species, Genus, Family, Order, Phylum level

2.1. species

fg <- FG[FG$taxonomicLevel == "Species", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# fg_sp doesn't have underscores, so lets add them
fg$taxon <- sub(" ", "_", fg$taxon)
# merge tables
colnames(fg)[1] <- "species"
FG_tax_table <- merge(tax_table, fg, by = "species", all.x = TRUE)


# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]

# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_sp"
colnames(FG_tax_table)[2] <- "guild_sp"

# save with new name
FUNGuild_sp <- FG_tax_table

2.2. Genus

fg <- FG[FG$taxonomicLevel == "Genus", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "genus"
FG_tax_table <- merge(tax_table, fg, by = "genus", all.x = TRUE)


# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
# and OTU as row names
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]

# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")


# change column names
colnames(FG_tax_table)[1] <- "trophicMode_gen"
colnames(FG_tax_table)[2] <- "guild_gen"

# save with new name
FUNGuild_gen <- FG_tax_table

2.3. Family

fg <- FG[FG$taxonomicLevel == "Family", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "family"
FG_tax_table <- merge(tax_table, fg, by = "family", all.x = TRUE)

# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]

# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_fam"
colnames(FG_tax_table)[2] <- "guild_fam"

# save with new name
FUNGuild_fam <- FG_tax_table

2.4. Order

fg <- FG[FG$taxonomicLevel == "Order", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "order"
FG_tax_table <- merge(tax_table, fg, by = "order", all.x = TRUE)

# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]

# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_ord"
colnames(FG_tax_table)[2] <- "guild_ord"

# save with new name
FUNGuild_ord <- FG_tax_table

2.5. Phylum

fg <- FG[FG$taxonomicLevel == "Phylum", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "phylum"
FG_tax_table <- merge(tax_table, fg, by = "phylum", all.x = TRUE)

# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]

# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_phy"
colnames(FG_tax_table)[2] <- "guild_phy"

# save with new name
FUNGuild_phy <- FG_tax_table

2.6. combine all annotations

species Genus Family Order Phylum

x <- left_join(rownames_to_column(FUNGuild_sp), rownames_to_column(FUNGuild_gen), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_fam), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_ord), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_phy), by = "rowname")
# get the value from another column if NA: now from genus 
y <- x %>%
  mutate(trophicMode_sp = coalesce(trophicMode_sp,trophicMode_gen))

# get the value from another column if NA: now from genus
y <- y %>%
  mutate(guild_sp = coalesce(guild_sp,guild_gen))

# same for family
# get the value from another column if NA
y <- y %>%
  mutate(trophicMode_sp = coalesce(trophicMode_sp,trophicMode_fam))

# get the value from another column if NA
y <- y %>%
  mutate(guild_sp = coalesce(guild_sp,guild_fam))

# same for order
# get the value from another column if NA
y <- y %>%
  mutate(trophicMode_sp = coalesce(trophicMode_sp,trophicMode_ord))

# get the value from another column if NA
y <- y %>%
  mutate(guild_sp = coalesce(guild_sp,guild_ord))

# same for phylum
# get the value from another column if NA
y <- y %>%
  mutate(trophicMode_sp = coalesce(trophicMode_sp,trophicMode_phy))

# get the value from another column if NA
y <- y %>%
  mutate(guild_sp = coalesce(guild_sp,guild_phy))

# then rename the sp columns
colnames(y)[2] <- "trophicMode"
colnames(y)[3] <- "guild"


# remove the rest of the columns
y <- y[, -c(4:11)]

# rownames
y2 <- y[,-1]
rownames(y2) <- y[,1]
# how many of different trophic modes and NAs?
table(y2$trophicMode, useNA = "ifany")
## 
##                         Pathotroph   Pathotroph-Pathotroph-Saprotroph 
##                                 20                                  7 
##  Pathotroph-Saprotroph-Symbiotroph                         Saprotroph 
##                                  7                                 37 
##                        Symbiotroph                         Pathotroph 
##                                148                               1480 
##              Pathotroph-Saprotroph  Pathotroph-Saprotroph-Symbiotroph 
##                               1973                                977 
##             Pathotroph-Symbiotroph                         Saprotroph 
##                                 98                               4805 
##             Saprotroph-Symbiotroph                        Symbiotroph 
##                               1179                                636 
##                               <NA> 
##                               9243

So approximately 44.8% are NAs

3. Make initial ps_FG

y3 <- left_join(rownames_to_column(y2), rownames_to_column(tax_table), by = "rowname")
row.names(y3) <- y3$rowname
y3 <- y3[, -1]
ps_FG <- phyloseq(otu_table(ps), tax_table(as.matrix(y3)), sample_data(ps))
ps_FG
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 10 taxonomic ranks ]
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

save(ps_FG, file = 'ps_FG_with_NAs')

20610 taxa; contains all the NAs

4. Remove empty spaces etc.

and remove also “|”

FG_tax_table <- as.data.frame(tax_table(ps_FG)) 
FG_tax_table$trophicMode <- gsub(" ", "", FG_tax_table$trophicMode, fixed = TRUE)
FG_tax_table$guild <- gsub("|", "", FG_tax_table$guild, fixed = TRUE)

5. Define AMFs, Ectomycorrhizal and Potential Plant pathogens

I will add a column “FUNGuild” where I curate the some symbiotroph and pathotroph fungi according to my research interests. I am especially interested of AMF as thay are important mycorrhiza in arable soils, but I alo wasnt to separate Ectomycorrhiza as they are important in forest. This leaves one more relatively big symbiotrophic guild, endophytes, which I will also specify. So the following curation will be done for FUNGuild column:

FG_tax_table <- FG_tax_table %>%  
   mutate(FG = case_when(grepl("Ectomycorrhizal", guild) & trophicMode=="Symbiotroph" ~ "Ectomycorrhizal", grepl("Arbuscular", guild) ~ "Arbuscular Mycorrhizal", guild == "Endophyte" & trophicMode=="Symbiotroph" ~ "Endophyte", guild=="Plant Pathogen" & trophicMode=="Pathotroph"~ "Plant Pathogen"))
FG_tax_table <- FG_tax_table %>%
  mutate(FUNGuild = case_when(FG == "Ectomycorrhizal" |  FG == "Arbuscular Mycorrhizal" | FG == "Endophyte" | FG == "Plant Pathogen" ~ FG, guild != "Ectomycorrhizal" |  FG != "Arbuscular Mycorrhizal" |  FG != "Endophyte" | FG != "Plant Pathogen" ~ FG_tax_table$trophicMode))


# remove the FG column

FG_tax_table <- FG_tax_table[, c(1:10, 12)]

Check the different written forms, if I have empty spaces?

unique(FG_tax_table$trophicMode)
## [1] NA                                  "Saprotroph-Symbiotroph"           
## [3] "Symbiotroph"                       "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph"             "Pathotroph"                       
## [7] "Saprotroph"                        "Pathotroph-Symbiotroph"           
## [9] "Pathotroph-Pathotroph-Saprotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
##  [1] NA                                  "Saprotroph-Symbiotroph"           
##  [3] "Arbuscular Mycorrhizal"            "Pathotroph-Saprotroph-Symbiotroph"
##  [5] "Pathotroph-Saprotroph"             "Pathotroph"                       
##  [7] "Saprotroph"                        "Endophyte"                        
##  [9] "Ectomycorrhizal"                   "Pathotroph-Symbiotroph"           
## [11] "Plant Pathogen"                    "Symbiotroph"                      
## [13] "Pathotroph-Pathotroph-Saprotroph"

There is a bug in the FUNGuild data, so that in addition to Pathotroph-Saprotroph there is the “wrong” one Pathotroph-Pathotroph-Saprotroph etc. I will correct these.

Also I will name Symbiotroh into Other Symbiotroph as I have extracted the AMF, EcM etc. from the symbiotroph trophic mode

# change some names
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"

FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Symbiotroph"] <- "Other Symbiotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph"] <- "Other Pathotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"


# reorder
FG_tax_table <- FG_tax_table[, c(1:2, 11, 3:10)]

Check again

unique(FG_tax_table$trophicMode)
## [1] NA                                  "Saprotroph-Symbiotroph"           
## [3] "Symbiotroph"                       "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph"             "Pathotroph"                       
## [7] "Saprotroph"                        "Pathotroph-Symbiotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
##  [1] NA                                  "Saprotroph-Symbiotroph"           
##  [3] "Arbuscular Mycorrhizal"            "Pathotroph-Saprotroph-Symbiotroph"
##  [5] "Pathotroph-Saprotroph"             "Other Pathotroph"                 
##  [7] "Saprotroph"                        "Endophyte"                        
##  [9] "Ectomycorrhizal"                   "Pathotroph-Symbiotroph"           
## [11] "Plant Pathogen"                    "Other Symbiotroph"

6. Save final ps_FG and check NA proportions

ps_FG <- phyloseq(otu_table(ps), tax_table(as.matrix(FG_tax_table)), sample_data(ps))
ps_FG
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 11 taxonomic ranks ]
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

save(ps_FG, file = 'ps_FG_with_NAs')

Check proportions of NAs

# lets calculate how big percentage of sequences where assigned (are not unknown)

ps_FG_RA <- transform(ps_FG, "compositional")

FG_RA_TmG <- aggregate_rare(ps_FG_RA, level = 'FUNGuild', detection = 0/100, prevalence = 0/100)
FG_RA_TmG
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 12 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 12 taxa by 2 taxonomic ranks ]
glom <- tax_glom(FG_RA_TmG, taxrank = 'FUNGuild')
percentages <- psmelt(glom)
df <- percentages %>%
  group_by(OTU) %>%
  summarise_at(vars(Abundance), list(name = mean))

df
## # A tibble: 12 × 2
##    OTU                                   name
##    <chr>                                <dbl>
##  1 Arbuscular Mycorrhizal            0.0240  
##  2 Ectomycorrhizal                   0.0305  
##  3 Endophyte                         0.0206  
##  4 Other Pathotroph                  0.0323  
##  5 Other Symbiotroph                 0.000308
##  6 Pathotroph-Saprotroph             0.0850  
##  7 Pathotroph-Saprotroph-Symbiotroph 0.0324  
##  8 Pathotroph-Symbiotroph            0.00580 
##  9 Plant Pathogen                    0.0225  
## 10 Saprotroph                        0.292   
## 11 Saprotroph-Symbiotroph            0.150   
## 12 Unknown                           0.305

Note, although 44.8% of OTUs were not assigned (info given above), 30.5% of reads were not assigned

Check also the percentage of NAs within the four sample types: forest, meadow, organic and conventional. Do they differ?

FG_RA_Tm <- aggregate_rare(ps_FG_RA, level = 'trophicMode', detection = 0/100, prevalence = 0/100)

glom <- tax_glom(FG_RA_Tm, taxrank = 'trophicMode')
percentages <- psmelt(glom)
df <- percentages %>%
  group_by(sample_type, OTU) %>%
  summarise_at(vars(Abundance), list(name = mean))
df
## # A tibble: 32 × 3
## # Groups:   sample_type [4]
##    sample_type OTU                                   name
##    <fct>       <chr>                                <dbl>
##  1 forest      Pathotroph                        0.0104  
##  2 forest      Pathotroph-Saprotroph             0.0841  
##  3 forest      Pathotroph-Saprotroph-Symbiotroph 0.0315  
##  4 forest      Pathotroph-Symbiotroph            0.000181
##  5 forest      Saprotroph                        0.154   
##  6 forest      Saprotroph-Symbiotroph            0.176   
##  7 forest      Symbiotroph                       0.261   
##  8 forest      Unknown                           0.283   
##  9 meadow      Pathotroph                        0.0266  
## 10 meadow      Pathotroph-Saprotroph             0.0568  
## # ℹ 22 more rows

management NAs Forest 28.3 meadow 23.8 organic 31.8 conventional 35.9

7. Remove NAs

# this is a robust way of removing NAs from the phyloseq object. 
## aggregation makes NAs into "Unknown"
ps_FG_x <- aggregate_rare(ps_FG, level = 'guild', detection = 0, prevalence = 0)
ps_FG_x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 145 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 145 taxa by 2 taxonomic ranks ]
# 145  taxa

# and "Unknown" can be removed like this
allTaxa = taxa_names(ps_FG_x)
badTaxa = c("Unknown")
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
ps_FG_x_pruned <- prune_taxa(myTaxa, ps_FG_x)
ps_FG_x_pruned
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 144 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 144 taxa by 2 taxonomic ranks ]
# 144 taxa

8. Calculate trophic mode proportions

lets calculate how big percentage of sequences belongs to which trophic mode when non-assigned are not included

x <- aggregate_rare(ps_FG, level = 'trophicMode', detection = 0, prevalence = 0)
x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 8 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa

# remove "Unknown"
allTaxa = taxa_names(x)
badTaxa = c("Unknown")
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
x <- prune_taxa(myTaxa, x)
x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 7 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 7 taxa by 1 taxonomic ranks ]
# 7 taxa

x2 <- transform(x, 'compositional')

glom <- tax_glom(x2, taxrank = 'trophicMode')
percentages <- psmelt(glom)
df <- percentages %>%
  group_by(OTU) %>%
  summarise_at(vars(Abundance), list(name = mean))
df
## # A tibble: 7 × 2
##   OTU                                 name
##   <chr>                              <dbl>
## 1 Pathotroph                        0.0786
## 2 Pathotroph-Saprotroph             0.132 
## 3 Pathotroph-Saprotroph-Symbiotroph 0.0453
## 4 Pathotroph-Symbiotroph            0.0105
## 5 Saprotroph                        0.402 
## 6 Saprotroph-Symbiotroph            0.218 
## 7 Symbiotroph                       0.114

RESULTS STEP 1: VENN diagrams and number of OTUs

Here, I will check the number of reads and OTUs and construct venn-diagrams for sample types (or management type) and soil layers.

library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
library("MicEco")

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
ps_RA <- microbiome::transform(ps, "compositional")

meta <- meta(ps)

1. How many reads and OTUs?

summarize_phyloseq(ps)
## [[1]]
## [1] "1] Min. number of reads = 44"
## 
## [[2]]
## [1] "2] Max. number of reads = 410851"
## 
## [[3]]
## [1] "3] Total number of reads = 11539503"
## 
## [[4]]
## [1] "4] Average number of reads = 82425.0214285714"
## 
## [[5]]
## [1] "5] Median number of reads = 79653.5"
## 
## [[6]]
## [1] "7] Sparsity = 0.928178415470992"
## 
## [[7]]
## [1] "6] Any OTU sum to 1 or less? NO"
## 
## [[8]]
## [1] "8] Number of singletons = 0"
## 
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n        (i.e. exactly one read detected across all samples)0"
## 
## [[10]]
## [1] "10] Number of sample variables are: 28"
## 
## [[11]]
##  [1] "sampleID"            "plot"                "sampling_position"  
##  [4] "actual_sample_depth" "depth"               "depth_numerical"    
##  [7] "vegetation"          "sample_type"         "root_mgg"           
## [10] "pH_H2O"              "EC_uScm"             "C_g_per_kg"         
## [13] "N_gkg"               "TP_gkg"              "Alox_mmolkg"        
## [16] "Feox_mmolkg"         "oxides_mmolkg"       "PH2O_mgkg"          
## [19] "Porg_mgkg"           "DOC_mgkg"            "Pinorg_mgkg"        
## [22] "C_per_N"             "observed"            "chao1"              
## [25] "shannon"             "observed_sng"        "chao1_sng"          
## [28] "shannon_sng"

We obtained 11662127 fungal reads which clustered into 31714 OTUs in the 140 samples.

2. VENN

soil management

MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional =  "#b71c1c")

# Note, relative is false, because I am using an alredy relative abundance transformed phyloseq object.
venn_no_prev <- ps_venn(
  ps_RA,
  "sample_type",
  fraction = 0,
  weight = FALSE,
  relative = FALSE,
  plot = TRUE,
  quantities = list(cex=0.7),
  fill = MyPalette
)
venn_no_prev

How many OTUs are shared by all sample types? I will pick the shared by all from the figure and divide it with the total OTU number to get the percentage

shared_by_all <- 2570
total_OTUs <- 20610
  
shared_by_all/total_OTUs
## [1] 0.1246967

12.5% of OTUs were shared by all management

soil layer

meta$new_depth <- meta$depth

meta$new_depth <- gsub("...", "-", meta$new_depth, fixed = TRUE)

meta$new_depth[meta$new_depth=="40-"] <- "40-80"

meta$new_depth[meta$new_depth=="0-10"] <- "0-10 cm"
meta$new_depth[meta$new_depth=="10-20"] <- "10-20 cm"
meta$new_depth[meta$new_depth=="20-30"] <- "20-30 cm"
meta$new_depth[meta$new_depth=="30-40"] <- "30-40 cm"
meta$new_depth[meta$new_depth=="40-80"] <- "40-80 cm"

sample_data(ps) <- sample_data(meta)

ps_RA <- transform(ps, "compositional")
  
# create your own color palette for sample types
MyPalette <- list(c('0-10 cm' = "#387212", '10-20 cm' = "#ADC476", '20-30 cm' = "#D8D2BA",'30-40 cm' = "#907852", '40-80 cm' = "#6A4C3A"))

venn_DEPTH <- ps_venn(
  ps_RA,
  "new_depth",
  fraction = 0,
  weight = FALSE,
  relative = TRUE,
  quantities = list(cex=0.7),
  plot = TRUE,
  fill = MyPalette[[1]]
)
venn_DEPTH

Again, how many OTUs are shared by all layers?

shared_by_all <- 1007
total_OTUs <- 20610
  
shared_by_all/total_OTUs
## [1] 0.04885978

4.9% of OTUs were shared by all layers

OTU allocation to layers

How much of the OTUs in the dataset were found in the first, first two or first three soil layers?

first three soil layers

ps_x <- subset_samples(ps, depth!="30...40" & depth!="40...")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
              MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20130 taxa and 84 samples ]
## sample_data() Sample Data:       [ 84 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 20130 taxa by 7 taxonomic ranks ]

In the first three layers: 20130 OTUs

OTUs <- 20130
total_OTUs <- 20610
  
OTUs/total_OTUs
## [1] 0.9767103

97.7% of all OTUs were found in the first 3 soil layers

How much of the OTUs in the dataset were found in the first, first two or first three soil layers?

first two soil layers

ps_x <- subset_samples(ps, depth!="20...30" & depth!="30...40" & depth!="40...")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
              MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 18392 taxa and 56 samples ]
## sample_data() Sample Data:       [ 56 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 18392 taxa by 7 taxonomic ranks ]

In the first three layers: 18392 OTUs

OTUs <- 18392
total_OTUs <- 20610
  
OTUs/total_OTUs
## [1] 0.8923823

89.2% of all OTUs were found in the first 2 soil layers

first soil layer

ps_x <- subset_samples(ps, depth=="0...10")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
              MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 14737 taxa and 28 samples ]
## sample_data() Sample Data:       [ 28 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 14737 taxa by 7 taxonomic ranks ]
OTUs <- 14737
total_OTUs <- 20610
  
OTUs/total_OTUs
## [1] 0.7150412

71.5% of all OTUs were found in the first soil layer

combine the figures

library(ggpubr)

fig <- ggarrange(venn_no_prev, venn_DEPTH,
                     ncol = 2, nrow = 1)

fig


RESULTS STEP 2: PCoA with OTUs and soil properties

library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
library(goeveg)
library(metagMisc)

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)

1. Root biomass log10 transformatio

hist(meta$root_mgg)

#perform Shapiro-Wilk Test
shapiro.test(meta$root_mgg)
## 
##  Shapiro-Wilk normality test
## 
## data:  meta$root_mgg
## W = 0.35847, p-value < 2.2e-16
#log transformation

meta <- meta %>%
  mutate(log_root = log10(root_mgg))

hist(meta$log_root)

#perform Shapiro-Wilk Test
shapiro.test(meta$log_root)
## 
##  Shapiro-Wilk normality test
## 
## data:  meta$log_root
## W = 0.98109, p-value = 0.04971
#now is normally distributed since p value is more than 0.05

save to ps

sample_data(ps) <- sample_data(meta)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
save(ps, file='ps_FINAL')

2. Bray distance and PCoA

ps_RA <- microbiome::transform(ps, "compositional")

To visualize beta diversity, I will do a PCoA which is metric instead of e.g. non-metric NMDS

I will be following somewhat this tutorial:

Joey Bernhardt

OTU = as(otu_table(ps_RA), "matrix")
# transpose if necessary
if(taxa_are_rows(ps_RA)){OTU <- t(OTU)}
# Coerce to data.frame
OTU = as.data.frame(OTU)
OTU <- as.matrix(OTU)

bray_dist <- vegan::vegdist(OTU, method="bray")
str(bray_dist)
##  'dist' Named num [1:9730] 0.367 0.694 0.967 0.723 0.47 ...
##  - attr(*, "maxdist")= num 1
##  - attr(*, "Size")= int 140
##  - attr(*, "Labels")= chr [1:140] "CG9.1_0to10" "CG9.1_10to20" "CG9.1_20to30" "CG9.1_30to40" ...
##  - attr(*, "Diag")= logi FALSE
##  - attr(*, "Upper")= logi FALSE
##  - attr(*, "method")= chr "bray"
##  - attr(*, "call")= language vegan::vegdist(x = OTU, method = "bray")
# use k = 3 so we'll get calculations for three axes
pcoa <- cmdscale(bray_dist, eig=TRUE, k = 3)
ordiplot(scores(pcoa),display="sites", type="points")

Ordination with axes 1 and 2

Let’s first make PCoA ordination with axes 1 and 2, and later for 1 and 3.

Env. variables

# Post-Hoc Projections of environmental variables
# envfit in vegan projects points onto vectors that have maximum correlation with corresponding environmental variables
pcoa.env12 <- envfit(pcoa, meta[ ,c("pH_H2O", "C_g_per_kg", "N_gkg", "TP_gkg", "sample_type", "depth_numerical", "DOC_mgkg", "Pinorg_mgkg", "Porg_mgkg", "log_root", "C_per_N", "Feox_mmolkg", "Alox_mmolkg")], na.rm = TRUE, choices=c(1:2), permutations = 999)

# main effects
pcoa.env12
## 
## ***VECTORS
## 
##                     Dim1     Dim2     r2 Pr(>r)    
## pH_H2O           0.89803 -0.43993 0.4996  0.001 ***
## C_g_per_kg      -0.92647  0.37636 0.5577  0.001 ***
## N_gkg           -0.95072  0.31005 0.5692  0.001 ***
## TP_gkg          -0.93787 -0.34699 0.5026  0.001 ***
## depth_numerical  0.98087  0.19465 0.5300  0.001 ***
## DOC_mgkg        -0.76599  0.64286 0.4138  0.001 ***
## Pinorg_mgkg      0.17893 -0.98386 0.0740  0.003 ** 
## Porg_mgkg       -0.99894  0.04605 0.5753  0.001 ***
## log_root        -0.80130  0.59827 0.4884  0.001 ***
## C_per_N         -0.97551  0.21996 0.4224  0.001 ***
## Feox_mmolkg     -0.84349  0.53715 0.4468  0.001 ***
## Alox_mmolkg     -0.67701  0.73598 0.3176  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## ***FACTORS:
## 
## Centroids:
##                            Dim1    Dim2
## sample_typeforest        0.0679  0.1867
## sample_typemeadow       -0.0229  0.1780
## sample_typeorganic      -0.0098 -0.1293
## sample_typeconventional  0.0006 -0.1050
## 
## Goodness of fit:
##                 r2 Pr(>r)    
## sample_type 0.2013  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## 1 observation deleted due to missingness
ef12.adj <- pcoa.env12
pvals.adj <- p.adjust (pcoa.env12$vectors$pvals, method = 'bonferroni')
ef12.adj$vectors$pvals <- pvals.adj
ef12.adj
## 
## ***VECTORS
## 
##                     Dim1     Dim2     r2 Pr(>r)  
## pH_H2O           0.89803 -0.43993 0.4996  0.012 *
## C_g_per_kg      -0.92647  0.37636 0.5577  0.012 *
## N_gkg           -0.95072  0.31005 0.5692  0.012 *
## TP_gkg          -0.93787 -0.34699 0.5026  0.012 *
## depth_numerical  0.98087  0.19465 0.5300  0.012 *
## DOC_mgkg        -0.76599  0.64286 0.4138  0.012 *
## Pinorg_mgkg      0.17893 -0.98386 0.0740  0.036 *
## Porg_mgkg       -0.99894  0.04605 0.5753  0.012 *
## log_root        -0.80130  0.59827 0.4884  0.012 *
## C_per_N         -0.97551  0.21996 0.4224  0.012 *
## Feox_mmolkg     -0.84349  0.53715 0.4468  0.012 *
## Alox_mmolkg     -0.67701  0.73598 0.3176  0.012 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## ***FACTORS:
## 
## Centroids:
##                            Dim1    Dim2
## sample_typeforest        0.0679  0.1867
## sample_typemeadow       -0.0229  0.1780
## sample_typeorganic      -0.0098 -0.1293
## sample_typeconventional  0.0006 -0.1050
## 
## Goodness of fit:
##                 r2 Pr(>r)    
## sample_type 0.2013  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## 1 observation deleted due to missingness

To plot (classical) mds (which is equivalent to PCoA) with ggplot a new datasheet needs to be created which contains the x,y points for each site. You can do this by calling the scores of you mds.

site.scrs <- as.data.frame(scores(pcoa, display = "sites")) #save pcoa results into dataframe
site.scrs <- cbind(site.scrs, soil_type = meta$sample_type) #add grouping variable "soil_type" to dataframe
site.scrs <- cbind(site.scrs, depth = meta$depth) #add grouping variable of depth

head(site.scrs)
##                     Dim1        Dim2         Dim3    soil_type   depth
## CG9.1_0to10  -0.32486951 -0.16939677  0.057117223 conventional  0...10
## CG9.1_10to20 -0.35683666 -0.18451022  0.010039481 conventional 10...20
## CG9.1_20to30  0.10483906 -0.26374886 -0.239911662 conventional 20...30
## CG9.1_30to40  0.49569905 -0.18888465 -0.132639765 conventional 30...40
## CG9.1_40to70  0.03411974 -0.01592131 -0.009914955 conventional   40...
## CG9.2_0to10  -0.24690800 -0.12969297  0.145831464 conventional  0...10

I will be following somewhat this tutorial for fitting the environmental variables etc:

jkzorz github

To show environmental extrinsic variables another datasheet needs to be created

Citation from the jkzorz github

“Extracting the required information from the envfit result is a bit more complicated. The envfit output contains information on the length of the segments for each variable. The segments are scaled to the r2 value, so that the environmental variables with a longer segment are more strongly correlated with the data than those with a shorter segment. You can extract this information with scores. Then these lengths are further scaled to fit the plot. This is done with a multiplier that is analysis specific, and can be accessed using the command ordiArrowMul(en). Below I multiply the scores by this multiplier to keep the coordinates in the correct proportion.”

Because my data contained continuous and categorical environmental variables, Im extracting the information from both separately using the vectors and factors options respectively.

categorial and continuous variables

# first categorial variables
env.scores_cat12 <- as.data.frame(scores(pcoa.env12, display = "factors"))*0.25 # multiply the envfit scores to keep them in the frame of the ordination
env.scores_cat12 <- cbind(env.scores_cat12, env.variables = rownames(env.scores_cat12)) #and then gives them their names

env.scores_cat12 <- cbind(env.scores_cat12, pval = pcoa.env12$factors$pvals) # add pvalues to dataframe
sig.env.scores_cat12 <- subset(env.scores_cat12, pval<=0.05) #subset data to show variables significant at 0.05
sig.env.scores_cat12
##                                  Dim1        Dim2           env.variables  pval
## sample_typeforest        0.0169631533  0.04667273       sample_typeforest 0.001
## sample_typemeadow       -0.0057255852  0.04448833       sample_typemeadow 0.001
## sample_typeorganic      -0.0024526659 -0.03233332      sample_typeorganic 0.001
## sample_typeconventional  0.0001453261 -0.02625329 sample_typeconventional 0.001
# all were significant


# then continous variables
env.scores_cont12 <- as.data.frame(scores(pcoa.env12, display = "vectors"))*0.25 # multiply the envfit scores to keep them in the frame of the ordination
env.scores_cont12 <- cbind(env.scores_cont12, env.variables = rownames(env.scores_cont12)) #and then gives them their names

env.scores_cont12 <- cbind(env.scores_cont12, pval = pcoa.env12$vectors$pvals) # add pvalues to dataframe
sig.env.scores_cont12 <- subset(env.scores_cont12, pval<=0.05) #subset data to show variables significant at 0.05
sig.env.scores_cont12
##                        Dim1         Dim2   env.variables  pval
## pH_H2O           0.15868133 -0.077735176          pH_H2O 0.001
## C_g_per_kg      -0.17297071  0.070265684      C_g_per_kg 0.001
## N_gkg           -0.17931771  0.058479126           N_gkg 0.001
## TP_gkg          -0.16622910 -0.061501047          TP_gkg 0.001
## depth_numerical  0.17852191  0.035426555 depth_numerical 0.001
## DOC_mgkg        -0.12318024  0.103379224        DOC_mgkg 0.001
## Pinorg_mgkg      0.01216617 -0.066897767     Pinorg_mgkg 0.003
## Porg_mgkg       -0.18942328  0.008732727       Porg_mgkg 0.001
## log_root        -0.14000008  0.104527314        log_root 0.001
## C_per_N         -0.15851066  0.035740927         C_per_N 0.001
## Feox_mmolkg     -0.14095001  0.089759046     Feox_mmolkg 0.001
## Alox_mmolkg     -0.09537901  0.103687400     Alox_mmolkg 0.001
# all were significant

species scores

A new dataset containing species data also needs to be made to look at species vectors.

# wascores computes Weighted Averages scores of species for ordination configuration or for environmental variables.
species.scores12 <- wascores(pcoa$points[,1:2], OTU)

species.scores13 <- wascores(pcoa$points[,1:3], OTU)

select OTUs: ordiselect

#  Ordiselect gives me more control for the significant OTUs to display
# I'm using 0.1 % of the most abundant and 100 % of the best fitting OTUs
# NOTE! the higher the ablim maybe more of the low diversity sample' OTUs shown..?

# The species scores of the 0.1% most abundant and 100% of the best fitting OTUs

ordis12 <- ordiselect(OTU, species.scores12, ablim = 0.001, fitlim = 1, choices = c(1,2), method = "axes", env = pcoa.env12)
## [1] "21 species selected (0.1% of total number of species)."
## [1] "All species selected which belong to the 0.1% most abundant species."
ordis12.species.scores <- species.scores12[ordis12, ]
ordis12.species.scores <- cbind(ordis12.species.scores, Species = rownames(ordis12.species.scores)) #add species names to dataframe

change into sp names

# OTUs into species names

OTU.sp <- as.data.frame(tax_table(ps_RA))
OTU.sp$OTU <- rownames(OTU.sp)
# remove other taxa
OTU.sp <- OTU.sp[ , -(1:6)]


try <- merge(data.frame(ordis12.species.scores), data.frame(OTU.sp), 
             by = 0,  all = FALSE)
#add row names
samp2 <- try[,-1]
rownames(samp2) <- try[,1]
try <- samp2
rm(samp2)
# remove the "Species"
try <- try[ , -(3)]
# remove "try"
ordis12.species.scores <- try
rm(try)

head(ordis12.species.scores)
##                          V1                 V2                        species
## OTU12776  0.126946387694976 -0.121543372900751             Clonostachys_rosea
## OTU139    0.200755588616414  0.046445493642642     Entomortierella_parvispora
## OTU13985 -0.237582025036352 0.0288145443505883            Saitozyma_podzolica
## OTU19296 -0.295916773006835 -0.149458330721953      Cladorrhinum_unclassified
## OTU20886  -0.25280082681134 -0.121756560222692 Paraphaeosphaeria_unclassified
## OTU23599  -0.22805912031261  0.169675633910535       Paraphaeosphaeria_viciae
##               OTU
## OTU12776 OTU12776
## OTU139     OTU139
## OTU13985 OTU13985
## OTU19296 OTU19296
## OTU20886 OTU20886
## OTU23599 OTU23599
# make values numeric
ordis12.species.scores$V1 <- as.numeric(ordis12.species.scores$V1)
ordis12.species.scores$V2 <- as.numeric(ordis12.species.scores$V2)

Now we have the relevant information for plotting the ordination in ggplot

MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional =  "#b71c1c")

site.scrs$new_depth <- site.scrs$depth

site.scrs$new_depth <- gsub("...", "-", site.scrs$new_depth, fixed = TRUE)

site.scrs$new_depth[site.scrs$new_depth=="40-"] <- "40-80"

site.scrs$new_depth[site.scrs$new_depth=="0-10"] <- "0-10 cm"
site.scrs$new_depth[site.scrs$new_depth=="10-20"] <- "10-20 cm"
site.scrs$new_depth[site.scrs$new_depth=="20-30"] <- "20-30 cm"
site.scrs$new_depth[site.scrs$new_depth=="30-40"] <- "30-40 cm"
site.scrs$new_depth[site.scrs$new_depth=="40-80"] <- "40-80 cm"

get axis %

#First lets run the PCoA with phyloseq to get the axis percentages (plot_ordination gives axis percentages)

GP.ord <- ordinate(ps_RA, "PCoA", "bray")
p2 = plot_ordination(ps_RA, GP.ord, type="samples", color="sample_type", shape="depth")
p2

Remember to change the axis percentages accordingly below!!!

pcoa.plot <- ggplot() + geom_point(data=site.scrs, aes(Dim1, Dim2, colour = factor(site.scrs$soil_type), shape = factor(site.scrs$new_depth)), size = 6, alpha = 0.6, stroke = 1.5) + theme_cowplot() + theme(panel.background = element_rect(fill = NA, colour = "black", size = 1, linetype = "solid")) + labs(colour = "", shape = "") + theme(legend.text = element_text(size = 12), axis.text = element_text(size = 16)) + scale_colour_manual(values = MyPalette) + labs(y = "PC2 (8.0%)", x = "PC1 (20.0%)")


pcoa.plot

Add OTUs t the PCoA

First, modify the species names

unique(ordis12.species.scores$species)
##  [1] "Clonostachys_rosea"             "Entomortierella_parvispora"    
##  [3] "Saitozyma_podzolica"            "Cladorrhinum_unclassified"     
##  [5] "Paraphaeosphaeria_unclassified" "Paraphaeosphaeria_viciae"      
##  [7] "Solicoccozyma_terricola"        "Pseudeurotium_unclassified"    
##  [9] "Pseudeurotium_hygrophilum"      "Clavulina_cinerea"             
## [11] "Fusarium_asiaticum"             "Pseudogymnoascus_unclassified" 
## [13] "Pleotrichocladium_opacum"       "Leotiomycetes_unclassified"    
## [15] "Rhexocercosporidium_panacis"    "Helotiales_unclassified"       
## [17] "Pseudogymnoascus_roseus"        "Solicoccozyma_terrea"          
## [19] "Mortierella_antarctica"         "Glutinoglossum_heptaseptatum"

I want to remove the “unclassified” from the end

ordis12.species.scores$species <- gsub("_unclassified","",as.character(ordis12.species.scores$species))
pcoa.plot_OTU <- pcoa.plot + ggrepel::geom_text_repel(data = ordis12.species.scores, aes(x=V1,y=V2,label=species), alpha=0.7, cex = 3.5, direction = "both", segment.size = 0.2, max.overlaps = Inf) + theme(legend.position = c(0.88, 0.8), legend.text = element_text(size = 12)) 

pcoa.plot_OTU

Add env. variables to the PCoA

First, simplify the names

sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "pH_H2O"] <- "pH"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "C_g_per_kg"] <- "C"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "N_gkg"] <- "N"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "TP_gkg"] <- "P-tot"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "depth_numerical"] <- "depth"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "DOC_mgkg"] <- "DOC"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "Pinorg_mgkg"] <- "P-inorg"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "Porg_mgkg"] <- "P-org"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "log_root"] <- "log root"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "C_per_N"] <- "C/N"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "Feox_mmolkg"] <- "Fe-ox"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "Alox_mmolkg"] <- "Al-ox"

…then plot

p1 <- pcoa.plot_OTU +
  geom_segment(aes(x = 0, y = 0, xend = Dim1, yend = Dim2), data = sig.env.scores_cont12, size =1, alpha = 0.5, colour = "grey30") + ggrepel::geom_text_repel(data = sig.env.scores_cont12, aes(x = Dim1, y = Dim2), colour = "blue", fontface = "bold", label = sig.env.scores_cont12$env.variables, segment.size = 0.2, box.padding = unit(0.1, "lines"), point.padding = (0.1), force = 1, max.time = 30, nudge_y = 0.00, nudge_x = 0.00) 

p1

This was saved with width 1200 and height 900

PCoA ordination for axes 1 and 3

pcoa.env13 <- envfit(pcoa, meta[ ,c("pH_H2O", "C_g_per_kg", "N_gkg", "TP_gkg", "sample_type", "depth_numerical", "DOC_mgkg", "Pinorg_mgkg", "Porg_mgkg", "log_root", "C_per_N", "Feox_mmolkg", "Alox_mmolkg")], na.rm = TRUE, choices=c(1:3), permutations = 999)

pcoa.env13
## 
## ***VECTORS
## 
##                     Dim1     Dim2     Dim3     r2 Pr(>r)    
## pH_H2O           0.71582 -0.34967  0.60442 0.5947  0.001 ***
## C_g_per_kg      -0.70297  0.28453 -0.65182 0.6894  0.001 ***
## N_gkg           -0.74553  0.24218 -0.62092 0.6801  0.001 ***
## TP_gkg          -0.91468 -0.33863 -0.22064 0.5108  0.001 ***
## depth_numerical  0.82942  0.16519  0.53364 0.5935  0.001 ***
## DOC_mgkg        -0.57514  0.48141 -0.66141 0.5386  0.001 ***
## Pinorg_mgkg      0.12414 -0.67859  0.72395 0.1308  0.001 ***
## Porg_mgkg       -0.84201  0.03811 -0.53812 0.6442  0.001 ***
## log_root        -0.70696  0.52696 -0.47172 0.5404  0.001 ***
## C_per_N         -0.78189  0.17543 -0.59823 0.4935  0.001 ***
## Feox_mmolkg     -0.67315  0.42760 -0.60335 0.5371  0.001 ***
## Alox_mmolkg     -0.40124  0.43448 -0.80637 0.5726  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## ***FACTORS:
## 
## Centroids:
##                            Dim1    Dim2    Dim3
## sample_typeforest        0.0679  0.1867  0.1190
## sample_typemeadow       -0.0229  0.1780 -0.0916
## sample_typeorganic      -0.0098 -0.1293  0.0392
## sample_typeconventional  0.0006 -0.1050  0.0041
## 
## Goodness of fit:
##                 r2 Pr(>r)    
## sample_type 0.1997  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## 1 observation deleted due to missingness
ef13.adj <- pcoa.env13
pvals.adj <- p.adjust (pcoa.env13$vectors$pvals, method = 'bonferroni')
ef13.adj$vectors$pvals <- pvals.adj
ef13.adj
## 
## ***VECTORS
## 
##                     Dim1     Dim2     Dim3     r2 Pr(>r)  
## pH_H2O           0.71582 -0.34967  0.60442 0.5947  0.012 *
## C_g_per_kg      -0.70297  0.28453 -0.65182 0.6894  0.012 *
## N_gkg           -0.74553  0.24218 -0.62092 0.6801  0.012 *
## TP_gkg          -0.91468 -0.33863 -0.22064 0.5108  0.012 *
## depth_numerical  0.82942  0.16519  0.53364 0.5935  0.012 *
## DOC_mgkg        -0.57514  0.48141 -0.66141 0.5386  0.012 *
## Pinorg_mgkg      0.12414 -0.67859  0.72395 0.1308  0.012 *
## Porg_mgkg       -0.84201  0.03811 -0.53812 0.6442  0.012 *
## log_root        -0.70696  0.52696 -0.47172 0.5404  0.012 *
## C_per_N         -0.78189  0.17543 -0.59823 0.4935  0.012 *
## Feox_mmolkg     -0.67315  0.42760 -0.60335 0.5371  0.012 *
## Alox_mmolkg     -0.40124  0.43448 -0.80637 0.5726  0.012 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## ***FACTORS:
## 
## Centroids:
##                            Dim1    Dim2    Dim3
## sample_typeforest        0.0679  0.1867  0.1190
## sample_typemeadow       -0.0229  0.1780 -0.0916
## sample_typeorganic      -0.0098 -0.1293  0.0392
## sample_typeconventional  0.0006 -0.1050  0.0041
## 
## Goodness of fit:
##                 r2 Pr(>r)    
## sample_type 0.1997  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## 1 observation deleted due to missingness
# first categorial variables
env.scores_cat13 <- as.data.frame(scores(pcoa.env13, display = "factors"))*0.25
env.scores_cat13 <- cbind(env.scores_cat13, env.variables = rownames(env.scores_cat13)) #and then gives them their names

env.scores_cat13 <- cbind(env.scores_cat13, pval = pcoa.env13$factors$pvals) # add pvalues to dataframe
sig.env.scores_cat13 <- subset(env.scores_cat13, pval<=0.05) #subset data to show variables significant at 0.05



# then continous variables
env.scores_cont13 <- as.data.frame(scores(pcoa.env13, display = "vectors"))*0.25
env.scores_cont13 <- cbind(env.scores_cont13, env.variables = rownames(env.scores_cont13)) #and then gives them their names

env.scores_cont13 <- cbind(env.scores_cont13, pval = pcoa.env13$vectors$pvals) # add pvalues to dataframe
sig.env.scores_cont13 <- subset(env.scores_cont13, pval<=0.05) #subset data to show variables significant at 0.05
# The species scores of the 0.1% most abundant and 100% of the best fitting OTUs
ordis13 <- ordiselect(OTU, species.scores13, ablim = 0.001, fitlim = 1, choices = c(1,3), method = "axes", env = pcoa.env13)
## [1] "21 species selected (0.1% of total number of species)."
## [1] "All species selected which belong to the 0.1% most abundant species."
ordis13.species.scores <- species.scores13[ordis13, ]
ordis13.species.scores <- cbind(ordis13.species.scores, Species = rownames(ordis13.species.scores)) #add species names to dataframe
# change into sp names
try <- merge(data.frame(ordis13.species.scores), data.frame(OTU.sp), 
             by = 0,  all = FALSE)
#add row names
samp2 <- try[,-1]
rownames(samp2) <- try[,1]
try <- samp2
rm(samp2)
# remove the "Species"
try <- try[ , -c(2, 4)]
# remove "try"
ordis13.species.scores <- try
rm(try)

head(ordis13.species.scores)
##                          V1                   V3                        species
## OTU12776  0.126946387694976 -0.00534028987033588             Clonostachys_rosea
## OTU139    0.200755588616414  -0.0370071455091096     Entomortierella_parvispora
## OTU13985 -0.237582025036352  -0.0956487846132982            Saitozyma_podzolica
## OTU19296 -0.295916773006835    0.120579023660146      Cladorrhinum_unclassified
## OTU20886  -0.25280082681134    0.026574191820344 Paraphaeosphaeria_unclassified
## OTU23599  -0.22805912031261   -0.175344391490801       Paraphaeosphaeria_viciae
##               OTU
## OTU12776 OTU12776
## OTU139     OTU139
## OTU13985 OTU13985
## OTU19296 OTU19296
## OTU20886 OTU20886
## OTU23599 OTU23599
ordis13.species.scores$V1 <- as.numeric(ordis13.species.scores$V1)
ordis13.species.scores$V3 <- as.numeric(ordis13.species.scores$V3)
#First lets run the PCoA in with phyloseq to get the axis percentages

GP.ord <- ordinate(ps_RA, "PCoA", "bray", k = 3)
p2 = plot_ordination(ps_RA, GP.ord, type="samples", axes = c(1, 3), color="sample_type", shape="depth")
p2

Remember to change the axis percentages accordingly!!

pcoa.plot <- ggplot() + geom_point(data=site.scrs, aes(Dim1, Dim3, colour = factor(site.scrs$soil_type), shape = factor(site.scrs$depth)), size = 6, alpha = 0.6, stroke = 1.5) + theme_cowplot() + theme(panel.background = element_rect(fill = NA, colour = "black", size = 1, linetype = "solid")) + labs(colour = "", shape = "") + theme(legend.position = "right", legend.text = element_text(size = 12), axis.text = element_text(size = 16)) + scale_colour_manual(values = MyPalette) + labs(y = "PC3 (5.8%)", x = "PC1 (20.0%)")

pcoa.plot

# modify the species names
ordis13.species.scores$species <- gsub("_unclassified","",as.character(ordis13.species.scores$species))
pcoa.plot_OTU <- pcoa.plot + ggrepel::geom_text_repel(data = ordis13.species.scores, aes(x=V1,y=V3,label=species), alpha=0.7, cex = 3.5, direction = "both", segment.size = 0.2, max.overlaps = Inf) + theme(legend.text = element_text(size = 12)) + theme(legend.position = c(0.85, 0.8), legend.text = element_text(size = 12))# + theme(legend.position="none")# if problems, this might help
pcoa.plot_OTU

I will not add env. variables here. Just a simple PCoA

pcoa.plot_OTU +
  geom_segment(aes(x = 0, y = 0, xend = Dim1, yend = Dim3), data = sig.env.scores_cont13, size =1, alpha = 0.5, colour = "grey30") + ggrepel::geom_text_repel(data = sig.env.scores_cont13, aes(x = Dim1, y = Dim3), colour = "blue", fontface = "bold", label = sig.env.scores_cont13$env.variables, max.overlaps=Inf, direction = "y", segment.size = 0.2, box.padding = unit(0.5, "lines"), point.padding = (1), force = 1, max.time = 30, nudge_y = 0.01, nudge_x = 0.01) + theme(legend.position = c(0.91, 0.8), legend.text = element_text(size = 12))  #+ theme(legend.position = "none")


RESULTS STEP 3: PERMANOVA analysis at OTU level

Here I do permutational analysis of variance or PERMANOVA. With PERMANOVA, I want to check how much the main treatment factors, management type (here sample_type) and soil layer (depth), are responsible for differences in fungal communities. In addition, I will check how soil layers differ within management type (4.5) and in which soil layers we see a management type effect (4.6)

1. load packages and data

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("pairwiseAdonis")


setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata 
ps_RA <- microbiome::transform(ps, "compositional")
otu <- abundances(ps_RA)
meta <- meta(ps_RA)

2. Calculate Bray-Curtis (dis)similarities

ps_RA_bray <- phyloseq::distance(ps_RA, method = "bray")

3. check homogenity of variances

Check that variance homogeneity assumptions hold (to ensure the reliability of the results). If groups have signif. different spreads the permanova result may be potentially explained by that, rtaher than the groups.

Betadisper first calculates the average distance of group members to the group centroid in multivariate space (generated by a distance matrix). Then, an ANOVA is done to test if the dispersions (variances) of groups are different.

3.1 for management type

anova(betadisper(ps_RA_bray, meta$sample_type))
## Analysis of Variance Table
## 
## Response: Distances
##            Df  Sum Sq  Mean Sq F value Pr(>F)
## Groups      3 0.05437 0.018125   1.247 0.2953
## Residuals 136 1.97679 0.014535

We see that the ANOVA p-value is not significant meaning that the homogeneity of variance assumption is met

3.2 for depth

anova(betadisper(ps_RA_bray, meta$depth))
## Analysis of Variance Table
## 
## Response: Distances
##            Df  Sum Sq  Mean Sq F value    Pr(>F)    
## Groups      4 0.41023 0.102556   9.276 1.184e-06 ***
## Residuals 135 1.49258 0.011056                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We see that the ANOVA p-value is highly significant meaning that homogeneity of variance assumption is NOT met

3.2.1 post hoc

I’ll do post hoc analysis with Tukey’s test to see which groups differ in relation to their variances

TukeyHSD(betadisper(ps_RA_bray, meta$depth))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##                         diff          lwr        upr     p adj
## 10...20-0...10  -0.028389112 -0.106091062 0.04931284 0.8502993
## 20...30-0...10   0.041806442 -0.035895508 0.11950839 0.5722606
## 30...40-0...10   0.051717780 -0.025984170 0.12941973 0.3549246
## 40...-0...10     0.130756030  0.053054080 0.20845798 0.0000749
## 20...30-10...20  0.070195554 -0.007506396 0.14789750 0.0971647
## 30...40-10...20  0.080106892  0.002404942 0.15780884 0.0397902
## 40...-10...20    0.159145142  0.081443192 0.23684709 0.0000009
## 30...40-20...30  0.009911338 -0.067790612 0.08761329 0.9966478
## 40...-20...30    0.088949588  0.011247638 0.16665154 0.0161603
## 40...-30...40    0.079038250  0.001336300 0.15674020 0.0440806

Dispersions differ significantly between 40… and all other, and between 30…40 and 10…20

The latter is not a problem at all, because I am not interested of comparing layers if they are not consecutive, but I will keep in mind, that the consecutive layers 30-40 cmd and 40-80 cm do not have similar dispersions.

4. PERMANOVA

First, I will do PERMANOVA so that I include all management types (later without forest)

4.1 check if depth or management have larger effect

# first with just soil type and strata option
adonis2(formula = ps_RA_bray~ sample_type, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$depth)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Blocks:  strata 
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ sample_type, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$depth)
##              Df SumOfSqs      R2      F Pr(>F)    
## sample_type   3    5.591 0.10399 5.2615  1e-04 ***
## Residual    136   48.175 0.89601                  
## Total       139   53.766 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# then with just depth and strata option
adonis2(formula = ps_RA_bray~ depth, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$sample_type)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Blocks:  strata 
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ depth, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$sample_type)
##           Df SumOfSqs     R2      F Pr(>F)    
## depth      4    9.705 0.1805 7.4335  1e-04 ***
## Residual 135   44.061 0.8195                  
## Total    139   53.766 1.0000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Depth has a larger effect. So, let’s put it first in the model

4.2 PERMANOVA full model

For the full model it matters which “by” option we choose. When by=“terms” significance for each term is calculated sequentially from first to last, so that the order of terms matter. We will use this, because with sequential analysis we will get R2 values that sum up to 1, and will also get the significance and R2 values for each interaction term separately rather than for the interaction alone.

final <- adonis2(formula = ps_RA_bray ~ depth * sample_type, data = meta, permutations = 9999, method = "bray", by = "terms")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ depth * sample_type, data = meta, permutations = 9999, method = "bray", by = "terms")
##                    Df SumOfSqs      R2      F Pr(>F)    
## depth               4    9.705 0.18050 9.1871  1e-04 ***
## sample_type         3    5.591 0.10399 7.0576  1e-04 ***
## depth:sample_type  12    6.780 0.12611 2.1396  1e-04 ***
## Residual          120   31.690 0.58940                  
## Total             139   53.766 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.2.1 export the result

write.csv2(final, "\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS\\permanova_soiltype_and_depth.csv")

4.2.2 PERMANOVA full model without forest

I will not use this, rather the one above with forest

# subset samples
ps_RA <- microbiome::transform(ps, "compositional")
ps_x <- subset_samples(ps_RA, sample_type != "forest")
meta_subset <- meta(ps_x)


# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
              MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 19820 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 19820 taxa by 7 taxonomic ranks ]
b <- phyloseq::distance(ps_x, method = "bray")


set.seed(777)

final <- adonis2(formula = b ~ depth*sample_type, data = meta_subset, permutations = 9999, method = "bray", by = "terms")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = b ~ depth * sample_type, data = meta_subset, permutations = 9999, method = "bray", by = "terms")
##                    Df SumOfSqs      R2      F Pr(>F)    
## depth               4    9.800 0.21235 9.7433  1e-04 ***
## sample_type         2    3.927 0.08509 7.8083  1e-04 ***
## depth:sample_type   8    4.763 0.10320 2.3676  1e-04 ***
## Residual          110   27.660 0.59936                  
## Total             124   46.149 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.3 Pairwise PERMANOVA: management type

Pairwise PERMANOVA I will only do so that forest is excluded due to too few replicates for forest. But I will not use these result, this is just to check

set.seed(777)
pair.mod<-pairwise.adonis(b, factors=meta_subset$sample_type)
pair.mod
##                     pairs Df SumsOfSqs  F.Model         R2 p.value p.adjusted
## 1  conventional vs meadow  1  2.351620 6.722317 0.07492358   0.001      0.003
## 2 conventional vs organic  1  1.000537 2.947677 0.03429618   0.010      0.030
## 3       meadow vs organic  1  2.574066 7.371501 0.08634616   0.001      0.003
##   sig
## 1   *
## 2   .
## 3   *

4.5 Depth effect in each management type

I will do pairwise permanova analysis of depth for all management types separately, except for forest which has too few replicates

4.5.1 Meadow

# subset samples
x <- "meadow"
ps_RA_subset <- subset_samples(ps_RA, sample_type == x)
meta_subset <- subset(meta, sample_type == x)

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
              MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 11032 taxa and 40 samples ]
## sample_data() Sample Data:       [ 40 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 11032 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")

set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$depth)
pair.mod
##                 pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted sig
## 1   0...10 vs 10...20  1 0.6686591 4.418415 0.2398912   0.001       0.01   *
## 2   0...10 vs 20...30  1 1.2787641 6.259368 0.3089617   0.001       0.01   *
## 3   0...10 vs 30...40  1 2.1571845 9.795664 0.4116575   0.001       0.01   *
## 4     0...10 vs 40...  1 1.6755335 5.447545 0.2801148   0.002       0.02   .
## 5  10...20 vs 20...30  1 0.3728923 1.802396 0.1140584   0.048       0.48    
## 6  10...20 vs 30...40  1 1.6735250 7.511019 0.3491708   0.001       0.01   *
## 7    10...20 vs 40...  1 1.5401662 4.965605 0.2618216   0.002       0.02   .
## 8  20...30 vs 30...40  1 0.9160107 3.321638 0.1917624   0.001       0.01   *
## 9    20...30 vs 40...  1 1.0302087 2.837037 0.1684998   0.001       0.01   *
## 10   30...40 vs 40...  1 0.7004325 1.847860 0.1166000   0.012       0.12

4.5.2 Organic

# subset samples
x <- "organic"
ps_RA_subset <- subset_samples(ps_RA, sample_type == x)
meta_subset <- subset(meta, sample_type == x)

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
              MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 14151 taxa and 40 samples ]
## sample_data() Sample Data:       [ 40 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 14151 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")

set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$depth)
pair.mod
##                 pairs Df SumsOfSqs   F.Model         R2 p.value p.adjusted sig
## 1   0...10 vs 10...20  1 0.1366228  0.906114 0.06078808   0.443       1.00    
## 2   0...10 vs 20...30  1 0.4849847  2.278740 0.13998260   0.007       0.07    
## 3   0...10 vs 30...40  1 2.0284059  8.323851 0.37286806   0.001       0.01   *
## 4     0...10 vs 40...  1 2.3642953 11.171511 0.44381567   0.002       0.02   .
## 5  10...20 vs 20...30  1 0.4214687  2.119591 0.13149163   0.016       0.16    
## 6  10...20 vs 30...40  1 2.1185424  9.223078 0.39715141   0.001       0.01   *
## 7    10...20 vs 40...  1 2.4397574 12.343810 0.46856585   0.002       0.02   .
## 8  20...30 vs 30...40  1 1.0503864  3.600278 0.20455802   0.001       0.01   *
## 9    20...30 vs 40...  1 1.4017114  5.397392 0.27825350   0.001       0.01   *
## 10   30...40 vs 40...  1 0.5095377  1.753656 0.11131738   0.041       0.41

4.5.2 Conventional

# subset samples
x <- "conventional"
ps_RA_subset <- subset_samples(ps_RA, sample_type == x)
meta_subset <- subset(meta, sample_type == x)

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
              MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 13863 taxa and 45 samples ]
## sample_data() Sample Data:       [ 45 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 13863 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")

set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$depth)
pair.mod
##                 pairs Df SumsOfSqs    F.Model         R2 p.value p.adjusted sig
## 1   0...10 vs 10...20  1 0.1170223  0.8087735 0.04811615   0.757       1.00    
## 2   0...10 vs 20...30  1 0.7762987  3.5720248 0.18250665   0.003       0.03   .
## 3   0...10 vs 30...40  1 2.1200130  8.7764519 0.35422553   0.001       0.01   *
## 4     0...10 vs 40...  1 1.6494331  5.3721690 0.25136283   0.001       0.01   *
## 5  10...20 vs 20...30  1 0.7642509  4.0496264 0.20198014   0.007       0.07    
## 6  10...20 vs 30...40  1 2.2368859 10.5042323 0.39632283   0.001       0.01   *
## 7    10...20 vs 40...  1 1.8451677  6.6271158 0.29288381   0.001       0.01   *
## 8  20...30 vs 30...40  1 0.6735640  2.3585234 0.12847021   0.025       0.25    
## 9    20...30 vs 40...  1 0.7552291  2.1512623 0.11851861   0.013       0.13    
## 10   30...40 vs 40...  1 0.4996687  1.3314104 0.07682066   0.105       1.00

4.6 Management effect at different depths?

I will analyse these without forest as forest has too few replicates

4.6.1 1st layer

# define soil layer to be analysed
x <- "0...10"

# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")

ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
              MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 13638 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 13638 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")


set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
##                     pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted
## 1  conventional vs meadow  1 1.5452073 9.547816 0.3889477   0.001      0.003
## 2 conventional vs organic  1 0.7360756 4.347365 0.2247006   0.001      0.003
## 3       meadow vs organic  1 1.5542597 9.915270 0.4146000   0.002      0.006
##   sig
## 1   *
## 2   *
## 3   *

4.6.2 2nd layer

# subset samples
x <- "10...20"


# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")

ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
              MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 15128 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 15128 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")



set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
##                     pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted
## 1  conventional vs meadow  1 1.3202292 9.871317 0.3968956   0.001      0.003
## 2 conventional vs organic  1 0.6464922 5.141142 0.2552557   0.001      0.003
## 3       meadow vs organic  1 1.3555168 9.325285 0.3997930   0.002      0.006
##   sig
## 1   *
## 2   *
## 3   *

4.6.3 3rd layer

# subset samples
x <- "20...30"

# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")

ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
              MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 14304 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 14304 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")



set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
##                     pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted
## 1  conventional vs meadow  1 0.8814463 3.381681 0.1839702   0.001      0.003
## 2 conventional vs organic  1 0.4094517 1.567927 0.0946363   0.114      0.342
## 3       meadow vs organic  1 0.8348207 3.206261 0.1863427   0.002      0.006
##   sig
## 1   *
## 2    
## 3   *

4.6.4 4th layer

# subset samples
x <- "30...40"

# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")

ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
              MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 5256 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 5256 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")



set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
##                     pairs Df SumsOfSqs   F.Model         R2 p.value p.adjusted
## 1  conventional vs meadow  1 0.7196829 2.3881246 0.13734228   0.006      0.018
## 2 conventional vs organic  1 0.2610497 0.8266673 0.05223256   0.707      1.000
## 3       meadow vs organic  1 0.5459411 1.7774404 0.11265708   0.012      0.036
##   sig
## 1   .
## 2    
## 3   .

4.6.5 5th layer

# subset samples
x <- "40..."

# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")

ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
              MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2487 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 2487 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")


set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
##                     pairs Df SumsOfSqs  F.Model         R2 p.value p.adjusted
## 1  conventional vs meadow  1 0.4663823 1.030147 0.06426309   0.353      1.000
## 2 conventional vs organic  1 0.7337456 2.062743 0.12089163   0.011      0.033
## 3       meadow vs organic  1 1.0846039 2.992358 0.17610025   0.002      0.006
##   sig
## 1    
## 2   .
## 3   *

RESULTS STEP 4: PERMANOVA analysis with soil properties

PERMANOVA with soil properties will be done with only meadow, organic and conventional management types excluding forest

1. load packages and data

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("pairwiseAdonis")

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata 
ps_RA <- microbiome::transform(ps, "compositional")
ps_RA_nf <- subset_samples(ps_RA, sample_type!="forest")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_nf),
              MARGIN = ifelse(taxa_are_rows(ps_RA_nf), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_nf = prune_taxa((prev0 > 0), ps_RA_nf)
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 19820 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 19820 taxa by 7 taxonomic ranks ]
otu <- abundances(ps_RA_nf)
meta <- meta(ps_RA_nf)

Note: adonis cannot handle or account for NA or blanks in your data. Use na.omit(meta) %>% before each run where such variables are used where NAs exist

2. Calculate Bray-Curtis (dis)similarities

ps_RA_bray <- phyloseq::distance(ps_RA_nf, method = "bray")

4.2 PERMANOVA with soil properties

I will use the following soil properties

“log_root”
“pH_H2O”
“C_g_per_kg”
“N_gkg”
“TP_gkg”
“Alox_mmolkg”
“Feox_mmolkg”
“PH2O_mgkg”
“Porg_mgkg”
“DOC_mgkg”
“Pinorg_mgkg”
“C_per_N”

final <- adonis2(formula = ps_RA_bray ~ log_root, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ log_root, data = meta, permutations = 9999, method = "bray")
##           Df SumOfSqs      R2      F Pr(>F)    
## log_root   1    5.369 0.11634 16.194  1e-04 ***
## Residual 123   40.780 0.88366                  
## Total    124   46.149 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ pH_H2O, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ pH_H2O, data = meta, permutations = 9999, method = "bray")
##           Df SumOfSqs     R2      F Pr(>F)    
## pH_H2O     1    6.041 0.1309 18.526  1e-04 ***
## Residual 123   40.108 0.8691                  
## Total    124   46.149 1.0000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ C_g_per_kg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ C_g_per_kg, data = meta, permutations = 9999, method = "bray")
##             Df SumOfSqs      R2    F Pr(>F)    
## C_g_per_kg   1    6.482 0.14046 20.1  1e-04 ***
## Residual   123   39.667 0.85954                
## Total      124   46.149 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ N_gkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ N_gkg, data = meta, permutations = 9999, method = "bray")
##           Df SumOfSqs      R2      F Pr(>F)    
## N_gkg      1    6.647 0.14404 20.698  1e-04 ***
## Residual 123   39.502 0.85596                  
## Total    124   46.149 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ TP_gkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ TP_gkg, data = meta, permutations = 9999, method = "bray")
##           Df SumOfSqs      R2      F Pr(>F)    
## TP_gkg     1    5.559 0.12046 16.846  1e-04 ***
## Residual 123   40.590 0.87954                  
## Total    124   46.149 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ Alox_mmolkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ Alox_mmolkg, data = meta, permutations = 9999, method = "bray")
##              Df SumOfSqs      R2      F Pr(>F)    
## Alox_mmolkg   1    3.713 0.08045 10.762  1e-04 ***
## Residual    123   42.437 0.91955                  
## Total       124   46.149 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ Feox_mmolkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ Feox_mmolkg, data = meta, permutations = 9999, method = "bray")
##              Df SumOfSqs      R2      F Pr(>F)    
## Feox_mmolkg   1    4.853 0.10517 14.456  1e-04 ***
## Residual    123   41.296 0.89483                  
## Total       124   46.149 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ DOC_mgkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ DOC_mgkg, data = meta, permutations = 9999, method = "bray")
##           Df SumOfSqs      R2      F Pr(>F)    
## DOC_mgkg   1    5.300 0.11484 15.957  1e-04 ***
## Residual 123   40.850 0.88516                  
## Total    124   46.149 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ Pinorg_mgkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ Pinorg_mgkg, data = meta, permutations = 9999, method = "bray")
##              Df SumOfSqs      R2      F Pr(>F)  
## Pinorg_mgkg   1    0.699 0.01515 1.8918 0.0305 *
## Residual    123   45.450 0.98485                
## Total       124   46.149 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ C_per_N, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ C_per_N, data = meta, permutations = 9999, method = "bray")
##           Df SumOfSqs      R2      F Pr(>F)    
## C_per_N    1    4.909 0.10637 14.641  1e-04 ***
## Residual 123   41.240 0.89363                  
## Total    124   46.149 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

if missing values

colSums(is.na(meta))
##            sampleID                plot   sampling_position actual_sample_depth 
##                   0                   0                   0                   0 
##               depth     depth_numerical          vegetation         sample_type 
##                   0                   0                   0                   0 
##            root_mgg              pH_H2O             EC_uScm          C_g_per_kg 
##                   0                   0                   0                   0 
##               N_gkg              TP_gkg         Alox_mmolkg         Feox_mmolkg 
##                   0                   0                   0                   0 
##       oxides_mmolkg           PH2O_mgkg           Porg_mgkg            DOC_mgkg 
##                   0                   1                   1                   0 
##         Pinorg_mgkg             C_per_N            observed               chao1 
##                   0                   0                   0                   0 
##             shannon        observed_sng           chao1_sng         shannon_sng 
##                   0                   0                   0                   0 
##            log_root 
##                   0

These are NA:

Porg_mgkg for sample NG2A2_30to40

PH2O_mgkg for sample NG2B3_0to10

m <- meta %>% drop_na(Porg_mgkg)

x <- subset_samples(ps_RA_nf, sampleID!="NG2A2_30to40")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(x),
              MARGIN = ifelse(taxa_are_rows(x), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
x = prune_taxa((prev0 > 0), x)
x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 19817 taxa and 124 samples ]
## sample_data() Sample Data:       [ 124 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 19817 taxa by 7 taxonomic ranks ]
otu <- abundances(x)

b <- phyloseq::distance(x, method = "bray")

final <- adonis2(formula = b ~ Porg_mgkg, data = m, permutations = 9999, method = "bray", by = "terms")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = b ~ Porg_mgkg, data = m, permutations = 9999, method = "bray", by = "terms")
##            Df SumOfSqs      R2      F Pr(>F)    
## Porg_mgkg   1    6.645 0.14528 20.736  1e-04 ***
## Residual  122   39.095 0.85472                  
## Total     123   45.739 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m <- meta %>% drop_na(PH2O_mgkg)

x <- subset_samples(ps_RA_nf, sampleID!="NG2B3_0to10")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(x),
              MARGIN = ifelse(taxa_are_rows(x), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
x = prune_taxa((prev0 > 0), x)
x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 19819 taxa and 124 samples ]
## sample_data() Sample Data:       [ 124 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 19819 taxa by 7 taxonomic ranks ]
otu <- abundances(x)

b <- phyloseq::distance(x, method = "bray")

final <- adonis2(formula = b ~ PH2O_mgkg, data = m, permutations = 9999, method = "bray", by = "terms")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = b ~ PH2O_mgkg, data = m, permutations = 9999, method = "bray", by = "terms")
##            Df SumOfSqs      R2      F Pr(>F)    
## PH2O_mgkg   1    1.935 0.04229 5.3866  1e-04 ***
## Residual  122   43.824 0.95771                  
## Total     123   45.759 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

PERMANOVA with all soil properties in one model

remove these samples

Porg_mgkg (NG2A2_30to40) PH2O_mgkg (NG2B3_0to10)

m <- meta %>% drop_na(PH2O_mgkg)
m <- m %>% drop_na(Porg_mgkg)

ps_x <- subset_samples(ps_RA_nf, sampleID!="NG2B3_0to10")
ps_x <- subset_samples(ps_x, sampleID!="NG2A2_30to40")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
              MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 19816 taxa and 123 samples ]
## sample_data() Sample Data:       [ 123 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 19816 taxa by 7 taxonomic ranks ]
b <- phyloseq::distance(ps_x, method = "bray")

final <- adonis2(formula = b ~ log_root + pH_H2O + C_g_per_kg + N_gkg + TP_gkg + Alox_mmolkg + Feox_mmolkg + PH2O_mgkg + Porg_mgkg + DOC_mgkg + Pinorg_mgkg + C_per_N, data = m, permutations = 9999, method = "bray", by = NULL)

final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = b ~ log_root + pH_H2O + C_g_per_kg + N_gkg + TP_gkg + Alox_mmolkg + Feox_mmolkg + PH2O_mgkg + Porg_mgkg + DOC_mgkg + Pinorg_mgkg + C_per_N, data = m, permutations = 9999, method = "bray", by = NULL)
##           Df SumOfSqs      R2      F Pr(>F)    
## Model     12   15.333 0.33811 4.6826  1e-04 ***
## Residual 110   30.016 0.66189                  
## Total    122   45.350 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

PERMANOVA for each soil layer separatley

run for all layers and all of these separately:

# Define the environmental variables as a character vector, not as a factor
env <- c("log_root", "pH_H2O", "C_g_per_kg", "N_gkg", "TP_gkg", "Alox_mmolkg", "Feox_mmolkg", 
         "PH2O_mgkg", "Porg_mgkg", "DOC_mgkg", "Pinorg_mgkg", "C_per_N")

# Convert the 'depth' column to a factor
meta$depth <- as.factor(meta$depth)

# Initialize an empty list to store the results
adonis_results <- list()

# Loop over each depth and environmental variable
for (i in levels(meta$depth)) {
  for (j in env) {
    # Subset samples
    ps_x <- subset_samples(ps_RA, sample_type != "forest")
    ps_x <- subset_samples(ps_x, sampleID != "NG2B3_0to10")
    ps_x <- subset_samples(ps_x, sampleID != "NG2A2_30to40")
    ps_x <- subset_samples(ps_x, depth == i)
    meta_subset <- meta(ps_x)

    # Define prevalence of each taxa (in how many samples did each taxa appear at least once)
    prev0 <- apply(X = otu_table(ps_x),
                   MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
                   FUN = function(x) { sum(x > 0) })

    # Execute prevalence filter, using `prune_taxa()` function
    ps_x <- prune_taxa((prev0 > 0), ps_x)

    # Calculate Bray-Curtis distance
    b <- phyloseq::distance(ps_x, method = "bray")

    # Run adonis2 analysis
    formula <- as.formula(paste("b ~", j))
    adonis_result <- adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
    
    # Store the result in the list with a descriptive name
    result_name <- paste("depth", i, "env", j, sep = "_")
    adonis_results[[result_name]] <- adonis_result
  }
}

# View the list of results
adonis_results
## $depth_0...10_env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.7966 0.13653 3.4787 0.0014 **
## Residual 22   5.0376 0.86347                 
## Total    23   5.8342 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_0...10_env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs     R2      F Pr(>F)   
## Model     1   0.6628 0.1136 2.8195 0.0054 **
## Residual 22   5.1714 0.8864                 
## Total    23   5.8342 1.0000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_0...10_env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.5213 0.26075 7.7599  1e-04 ***
## Residual 22   4.3129 0.73925                  
## Total    23   5.8342 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_0...10_env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.4712 0.25216 7.4181  1e-04 ***
## Residual 22   4.3630 0.74784                  
## Total    23   5.8342 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_0...10_env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.2863 0.04908 1.1354 0.2882
## Residual 22   5.5479 0.95092              
## Total    23   5.8342 1.00000              
## 
## $depth_0...10_env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.0063 0.17248 4.5855  1e-04 ***
## Residual 22   4.8279 0.82752                  
## Total    23   5.8342 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_0...10_env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.7390 0.12666 3.1908 0.0025 **
## Residual 22   5.0952 0.87334                 
## Total    23   5.8342 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_0...10_env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.5390 0.09239 2.2396 0.0204 *
## Residual 22   5.2951 0.90761                
## Total    23   5.8342 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_0...10_env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   0.7905 0.13549 3.4479  9e-04 ***
## Residual 22   5.0437 0.86451                  
## Total    23   5.8342 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_0...10_env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.4018 0.24027 6.9575  1e-04 ***
## Residual 22   4.4324 0.75973                  
## Total    23   5.8342 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_0...10_env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.3372 0.05779 1.3495 0.1689
## Residual 22   5.4970 0.94221              
## Total    23   5.8342 1.00000              
## 
## $depth_0...10_env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.1766 0.20168 5.5579  1e-04 ***
## Residual 22   4.6575 0.79832                  
## Total    23   5.8342 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.7280 0.14084 3.7704 0.0013 **
## Residual 23   4.4407 0.85916                 
## Total    24   5.1686 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   0.9411 0.18209 5.1204  2e-04 ***
## Residual 23   4.2275 0.81791                  
## Total    24   5.1686 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.2399 0.23989 7.2586  1e-04 ***
## Residual 23   3.9287 0.76011                  
## Total    24   5.1686 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   0.9946 0.19242 5.4802  1e-04 ***
## Residual 23   4.1741 0.80758                  
## Total    24   5.1686 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.2530 0.04895 1.1838  0.243
## Residual 23   4.9156 0.95105              
## Total    24   5.1686 1.00000              
## 
## $depth_10...20_env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.2957 0.25069 7.6951  1e-04 ***
## Residual 23   3.8729 0.74931                  
## Total    24   5.1686 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.1620 0.22483 6.6708  1e-04 ***
## Residual 23   4.0066 0.77517                  
## Total    24   5.1686 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.3596 0.06957 1.7196 0.0688 .
## Residual 23   4.8091 0.93043                
## Total    24   5.1686 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.2818 0.05453 1.3265 0.1728
## Residual 23   4.8868 0.94547              
## Total    24   5.1686 1.00000              
## 
## $depth_10...20_env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.1131 0.21535 6.3126  1e-04 ***
## Residual 23   4.0555 0.78465                  
## Total    24   5.1686 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.3936 0.07615 1.8959   0.05 *
## Residual 23   4.7750 0.92385                
## Total    24   5.1686 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.5862 0.11342 2.9425  0.005 **
## Residual 23   4.5824 0.88658                 
## Total    24   5.1686 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.7395 0.10346 2.6541 0.0029 **
## Residual 23   6.4086 0.89654                 
## Total    24   7.1482 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   0.8450 0.11821 3.0833  2e-04 ***
## Residual 23   6.3032 0.88179                  
## Total    24   7.1482 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.7142 0.09991 2.5531 0.0049 **
## Residual 23   6.4340 0.90009                 
## Total    24   7.1482 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.6962 0.09739 2.4816 0.0067 **
## Residual 23   6.4520 0.90261                 
## Total    24   7.1482 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.4651 0.06506 1.6005 0.0711 .
## Residual 23   6.6831 0.93494                
## Total    24   7.1482 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.6839 0.09567 2.4332 0.0059 **
## Residual 23   6.4643 0.90433                 
## Total    24   7.1482 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.7618 0.10658 2.7437 0.0021 **
## Residual 23   6.3863 0.89342                 
## Total    24   7.1482 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.4951 0.06927 1.7117 0.0493 *
## Residual 23   6.6530 0.93073                
## Total    24   7.1482 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.6443 0.09014 2.2786 0.0105 *
## Residual 23   6.5038 0.90986                
## Total    24   7.1482 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.6601 0.09235 2.3401 0.0085 **
## Residual 23   6.4881 0.90765                 
## Total    24   7.1482 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.3030 0.04239 1.0182 0.4027
## Residual 23   6.8451 0.95761              
## Total    24   7.1482 1.00000              
## 
## $depth_20...30_env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.4538 0.06348 1.5591 0.0878 .
## Residual 23   6.6944 0.93652                
## Total    24   7.1482 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_30...40_env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.5268 0.07033 1.6643 0.0328 *
## Residual 22   6.9641 0.92967                
## Total    23   7.4909 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_30...40_env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2     F Pr(>F)  
## Model     1   0.4728 0.06311 1.482 0.0711 .
## Residual 22   7.0181 0.93689               
## Total    23   7.4909 1.00000               
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_30...40_env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2    F Pr(>F)
## Model     1   0.2850 0.03804 0.87 0.5167
## Residual 22   7.2060 0.96196            
## Total    23   7.4909 1.00000            
## 
## $depth_30...40_env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.2756 0.03678 0.8402 0.6539
## Residual 22   7.2154 0.96322              
## Total    23   7.4909 1.00000              
## 
## $depth_30...40_env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.4086 0.05455 1.2693 0.1997
## Residual 22   7.0823 0.94545              
## Total    23   7.4909 1.00000              
## 
## $depth_30...40_env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.5886 0.07858 1.8761 0.0193 *
## Residual 22   6.9023 0.92142                
## Total    23   7.4909 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_30...40_env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.5944 0.07936 1.8963  0.018 *
## Residual 22   6.8965 0.92064                
## Total    23   7.4909 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_30...40_env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2     F Pr(>F)
## Model     1   0.3054 0.04077 0.935 0.5159
## Residual 22   7.1855 0.95923             
## Total    23   7.4909 1.00000             
## 
## $depth_30...40_env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2     F Pr(>F)  
## Model     1   0.5900 0.07877 1.881 0.0144 *
## Residual 22   6.9009 0.92123               
## Total    23   7.4909 1.00000               
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_30...40_env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.3720 0.04966 1.1495 0.3077
## Residual 22   7.1189 0.95034              
## Total    23   7.4909 1.00000              
## 
## $depth_30...40_env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.2953 0.03942 0.9028 0.5857
## Residual 22   7.1956 0.96058              
## Total    23   7.4909 1.00000              
## 
## $depth_30...40_env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.4801 0.06409 1.5066 0.1147
## Residual 22   7.0108 0.93591              
## Total    23   7.4909 1.00000              
## 
## $depth_40..._env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.4089 0.04044 0.9693 0.4857
## Residual 23   9.7019 0.95956              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.5118 0.05062 1.2263 0.1537
## Residual 23   9.5990 0.94938              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.4801 0.04748 1.1465 0.2979
## Residual 23   9.6307 0.95252              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.3803 0.03761 0.8989 0.6571
## Residual 23   9.7305 0.96239              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.3102 0.03068 0.7281 0.9339
## Residual 23   9.8005 0.96932              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.3621 0.03582 0.8544 0.7201
## Residual 23   9.7487 0.96418              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.8108 0.08019 2.0052 0.0017 **
## Residual 23   9.3000 0.91981                 
## Total    24  10.1108 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_40..._env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.4612 0.04561 1.0992 0.2755
## Residual 23   9.6496 0.95439              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.3897 0.03854 0.9221 0.5867
## Residual 23   9.7211 0.96146              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.3038 0.03005 0.7125 0.8952
## Residual 23   9.8070 0.96995              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.2886 0.02854 0.6757 0.9697
## Residual 23   9.8222 0.97146              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.5202 0.05145 1.2476 0.1383
## Residual 23   9.5906 0.94855              
## Total    24  10.1108 1.00000

RESULTS STEPS 5 and 6:HEATMAPS with genera and COMPOSITION PLOTS for phyla, class and FUNGuild

A) HEATMAP for meadow, organic and conventional

1. load packages and data

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")


setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata 
ps_RA <- microbiome::transform(ps, "compositional")
ps_RA_nf <- subset_samples(ps_RA, sample_type!="forest")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_nf),
              MARGIN = ifelse(taxa_are_rows(ps_RA_nf), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_nf = prune_taxa((prev0 > 0), ps_RA_nf)
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 19820 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 19820 taxa by 7 taxonomic ranks ]
meta_nf <- meta(ps_RA_nf)

2. Overview of the heatmap analysis

I will do heatmap for fungal genera using only meadow, organic and conventional soils (without forest). I will use RA values For the final heatmap figure the RA values will be standardizes (z-transformed)

  1. First pick the 20 most abundant genera in each soil type depth
  2. Do unconstrained ordination on the picjed taxa (RA). Do with 3 axes and check both axis1 + axis2; and axis1 + axis3.
  3. Based on PCoA, decide how to cluster samples (which soil layers should be “pooled” when doing comparisons, this is done to avoid too many comparisons): top soil, sub soil and deep soil?
  4. Do multiple testing between clusters that make sense (do not test between e.g. meadow deep and organic topsoil)
  5. Make a HEATMAP of the significant taxa only

3. edit unclassified taxa at genus level

# make a dataframe of the tax-table
tax <- as.data.frame(ps_RA@tax_table)
# calculate how many genus
length(unique(tax$genus))
## [1] 943
tax$genus <- sub(".*_.*", "Unclassified_genus",tax$genus)
# calculate how many genus
length(unique(tax$genus))
## [1] 710
# of which 1 is "Unclassified_genus" 

# edit the phyloseq object
ps_genus <- ps
tax <- as.matrix(tax) # convert it into a matrix.
tax <- tax_table(tax) # convert into phyloseq compatible file.
tax_table(ps_genus) <- tax # incroporate into phyloseq Object

ps_genus <- aggregate_rare(ps_genus, level = 'genus', detection = 0/100, prevalence = 0/140)
ps_genus
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 710 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 710 taxa by 2 taxonomic ranks ]
ps_genus_RA <- microbiome::transform(ps_genus, "compositional")

# remove unclassified
allTaxa = taxa_names(ps_genus_RA)
badTaxa = c("Unclassified_genus")
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
ps_genus_RA_pruned <- prune_taxa(myTaxa, ps_genus_RA)
ps_genus_RA_pruned
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 709 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 709 taxa by 2 taxonomic ranks ]

4. Pick 20 most abund taxa in each soil management layer

# sample wise filtering according to most abund. genera

# Initialize an empty list to store the taxa
abund.taxa <- list()

for (i in meta_nf$sample_type) {
  for (j in meta_nf$depth) {
    x <- sample_names(sample_data(ps_genus_RA_pruned)[sample_data(ps_genus_RA_pruned)$sample_type == i & sample_data(ps_genus_RA_pruned)$depth == j,])

# Calculate taxa mean of the selected samples
top20 <- head(sort(rowMeans(otu_table(ps_genus_RA_pruned)[,x]), decreasing = TRUE), 20)

result_name <- paste("sample_type", i, "depth", j, sep = "_")

abund.taxa[[result_name]] <- top20
  }
}

5. Combine picked taxa

management_layer <- c(names(abund.taxa))

all_top20 <- c()

for (i in management_layer) {
  top20 <- c(names(abund.taxa[[i]]))
  all_top20 <- c(all_top20, top20)

}

all_top20_unique <- unique(all_top20)
length(all_top20_unique)
## [1] 95

6. Make a phyloseq of picked taxa

# first aggregate to genus
ps_RA_nf_genus <- aggregate_rare(ps_RA_nf, level = "genus", detection = 0/140, prevalence = 0/100)

# filter by taxa name
ps_RA_nf_genus_pruned <- prune_taxa(all_top20_unique, ps_RA_nf_genus)
ps_RA_nf_genus_pruned
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 95 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 95 taxa by 1 taxonomic ranks ]
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

ps_genus_nf_HETAMAP <- ps_RA_nf_genus_pruned
save(ps_genus_nf_HETAMAP, file='ps_genus_nf_HETAMAP_all_top20')

7. PCoA

OTU = as(otu_table(ps_RA_nf_genus_pruned), "matrix")
# transpose if necessary
if(taxa_are_rows(ps_RA_nf_genus_pruned)){OTU <- t(OTU)}
# Coerce to data.frame
OTU = as.data.frame(OTU)
OTU <- as.matrix(OTU)

bray_dist <- vegan::vegdist(OTU, method="bray")
str(bray_dist)
##  'dist' Named num [1:7750] 0.29 0.558 0.934 0.43 0.397 ...
##  - attr(*, "maxdist")= num 1
##  - attr(*, "Size")= int 125
##  - attr(*, "Labels")= chr [1:125] "CG9.1_0to10" "CG9.1_10to20" "CG9.1_20to30" "CG9.1_30to40" ...
##  - attr(*, "Diag")= logi FALSE
##  - attr(*, "Upper")= logi FALSE
##  - attr(*, "method")= chr "bray"
##  - attr(*, "call")= language vegan::vegdist(x = OTU, method = "bray")
pcoa <- cmdscale(bray_dist, eig=TRUE, k = 3)
# To plot the output from the mds using ggplot a new datasheet needs to be created which contains the x,y points for each site. You can do this by calling the scores of you mds.
site.scrs <- as.data.frame(scores(pcoa, display = "sites")) #save pcoa results into dataframe
site.scrs <- cbind(site.scrs, management_type = meta_nf$sample_type) #add grouping variable "management_type" to dataframe
site.scrs <- cbind(site.scrs, depth = meta_nf$depth) #add grouping variable of depth

head(site.scrs)
##                     Dim1        Dim2        Dim3 management_type   depth
## CG9.1_0to10  -0.27813982 -0.10346866  0.09148370    conventional  0...10
## CG9.1_10to20 -0.31446403 -0.15468057  0.01103617    conventional 10...20
## CG9.1_20to30 -0.02299791 -0.15075965 -0.18961661    conventional 20...30
## CG9.1_30to40  0.44844111 -0.09800013 -0.09893131    conventional 30...40
## CG9.1_40to70 -0.09249699 -0.11668507 -0.04215126    conventional   40...
## CG9.2_0to10  -0.19817772 -0.09107915  0.22764881    conventional  0...10
MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional =  "#b71c1c")

7.1. axes 1 and 2

first get axis %

GP.ord <- ordinate(ps_RA_nf_genus_pruned, "PCoA", "bray")
pord = plot_ordination(ps_RA_nf_genus_pruned, GP.ord, type="samples", color="sample_type", shape="depth")
pord

Change axis percentages accordingly!

pcoa.plot12 <- ggplot() + geom_point(data=site.scrs, aes(Dim1, Dim2, colour = factor(site.scrs$management_type), shape = factor(site.scrs$depth)), size = 5, alpha = 0.7) + theme_cowplot() + theme(panel.background = element_rect(fill = NA, colour = "black", size = 1, linetype = "solid")) + labs(colour = "", shape = "") + theme(legend.text = element_text(size = 12), axis.text = element_text(size = 16)) + scale_colour_manual(values = MyPalette)  + labs(y = "PC2 (9.4%)", x = "PC1 (22.7%)")


pcoa.plot12

7.2. axes 1 and 3

first get axis %

GP.ord <- ordinate(ps_RA_nf_genus_pruned, "PCoA", "bray", k = 3)
pord = plot_ordination(ps_RA_nf_genus_pruned, GP.ord, type="samples", axes = c(1, 3), color="sample_type", shape="depth")
pord

Change axis percentages accordingly!

pcoa.plot13 <- ggplot() + geom_point(data=site.scrs, aes(Dim1, Dim3, colour = factor(site.scrs$management_type), shape = factor(site.scrs$depth)), size = 5, alpha = 0.7) + theme_cowplot() + theme(panel.background = element_rect(fill = NA, colour = "black", size = 1, linetype = "solid")) + labs(colour = "", shape = "") + theme(legend.text = element_text(size = 13), axis.text = element_text(size = 16)) + scale_colour_manual(values = MyPalette) + labs(y = "PC3 (8.0%)", x = "PC1 (22.7%)")


pcoa.plot13

8. Combine figures

library("ggpubr")
figure <- ggarrange(pcoa.plot12, pcoa.plot13,
                    labels = c("A", "B"),
                    ncol = 2, nrow = 1, common.legend = TRUE, legend="right", widths = c(1, 1))
figure

9. Add clusters to meta

Based on the PCoAs, I decided to cluster samples within each management types into topsoil (0-20 cm), subsoil (20-30 cm) and deep soil (30-80 cm)

Add the above categories to our samples

meta_nf$cluster <- NA

meta_nf$cluster[meta_nf$depth_numerical<20 & meta_nf$sample_type=="meadow"]<-"topsoil_meadow"
meta_nf$cluster[meta_nf$depth_numerical<20 & meta_nf$sample_type=="organic"]<-"topsoil_organic"
meta_nf$cluster[meta_nf$depth_numerical<20 & meta_nf$sample_type=="conventional"]<-"topsoil_conventional"

meta_nf$cluster[meta_nf$depth_numerical==25 & meta_nf$sample_type=="meadow"]<-"subsoil_meadow"
meta_nf$cluster[meta_nf$depth_numerical==25 & meta_nf$sample_type=="organic"]<-"subsoil_organic"
meta_nf$cluster[meta_nf$depth_numerical==25 & meta_nf$sample_type=="conventional"]<-"subsoil_conventional"


meta_nf$cluster[meta_nf$depth_numerical>30 & meta_nf$sample_type=="meadow"]<-"deepsoil_meadow"
meta_nf$cluster[meta_nf$depth_numerical>30 & meta_nf$sample_type=="organic"]<-"deepsoil_organic"
meta_nf$cluster[meta_nf$depth_numerical>30 & meta_nf$sample_type=="conventional"]<-"deepsoil_conventional"

cluster <- unique(meta_nf$cluster)
cluster
## [1] "topsoil_conventional"  "subsoil_conventional"  "deepsoil_conventional"
## [4] "topsoil_meadow"        "subsoil_meadow"        "deepsoil_meadow"      
## [7] "topsoil_organic"       "subsoil_organic"       "deepsoil_organic"
# add the new meta_nf to ps_RA_nf_genus_pruned
sample_data(ps_RA_nf_genus_pruned) <- sample_data(meta_nf)

10. Test abundance differences

lets test separately for the ones that make sense, for example, no point testing between organic topsoil and conventional deep, but rather topsoil for both soil types and then within organic between top and deep

NOTE! At the end of chunk I do p-value adjustment (“BH”) for all comparisons in the chunk

10.1. topsoil all soil types

library("data.table")
library("rstatix")

subset <- subset_samples(ps_RA_nf_genus_pruned, cluster=="topsoil_meadow" | cluster=="topsoil_conventional" | cluster=="topsoil_organic")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 95 taxa and 50 samples ]
## sample_data() Sample Data:       [ 50 samples by 30 sample variables ]
## tax_table()   Taxonomy Table:    [ 95 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)
# Make cluster into a factor
melt_df$cluster <- factor(melt_df$cluster)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$cluster,
                            p.adjust.method = NULL)
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df1 <- rbindlist(pval.list)
pval.list_df1 <- as.data.frame(pval.list_df1)
# make into long format: where the new column  called "vs" contains the sample cluster comparison, and new column "p_value" contains the p_values
pval.list_df1 <- gather(pval.list_df1, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df1 <- adjust_pvalue(pval.list_df1, p.col = "p_value", output.col = "adj_p", method = "BH")

10.2. subsoil all soil types

subset <- subset_samples(ps_RA_nf_genus_pruned, cluster=="subsoil_meadow" | cluster=="subsoil_conventional" | cluster=="subsoil_organic")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 95 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 30 sample variables ]
## tax_table()   Taxonomy Table:    [ 95 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$cluster,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df7 <- rbindlist(pval.list)
pval.list_df7 <- as.data.frame(pval.list_df7)
# make into long format: where the new column  called "vs" contains the sample cluster comparison, and new column "p_value" contains the p_values
pval.list_df7 <- gather(pval.list_df7, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df7 <- adjust_pvalue(pval.list_df7, p.col = "p_value", output.col = "adj_p", method = "BH")

10.3. deepsoil all soil types

subset <- subset_samples(ps_RA_nf_genus_pruned, cluster=="deepsoil_meadow" | cluster=="deepsoil_conventional" | cluster=="deepsoil_organic")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 95 taxa and 50 samples ]
## sample_data() Sample Data:       [ 50 samples by 30 sample variables ]
## tax_table()   Taxonomy Table:    [ 95 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$cluster,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df2 <- rbindlist(pval.list)
pval.list_df2 <- as.data.frame(pval.list_df2)
# make into long format: where the new column  called "vs" contains the sample cluster comparison, and new column "p_value" contains the p_values
pval.list_df2 <- gather(pval.list_df2, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df2 <- adjust_pvalue(pval.list_df2, p.col = "p_value", output.col = "adj_p", method = "BH")

10.4. only organic

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type=="organic")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 95 taxa and 40 samples ]
## sample_data() Sample Data:       [ 40 samples by 30 sample variables ]
## tax_table()   Taxonomy Table:    [ 95 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$cluster,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df3 <- rbindlist(pval.list)
pval.list_df3 <- as.data.frame(pval.list_df3)
# make into long format: where the new column  called "vs" contains the sample cluster comparison, and new column "p_value" contains the p_values
pval.list_df3 <- gather(pval.list_df3, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df3 <- adjust_pvalue(pval.list_df3, p.col = "p_value", output.col = "adj_p", method = "BH")

10.5. only conventional

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type=="conventional")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 95 taxa and 45 samples ]
## sample_data() Sample Data:       [ 45 samples by 30 sample variables ]
## tax_table()   Taxonomy Table:    [ 95 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$cluster,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df4 <- rbindlist(pval.list)
pval.list_df4 <- as.data.frame(pval.list_df4)
# make into long format: where the new column  called "vs" contains the sample cluster comparison, and new column "p_value" contains the p_values
pval.list_df4 <- gather(pval.list_df4, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df4 <- adjust_pvalue(pval.list_df4, p.col = "p_value", output.col = "adj_p", method = "BH")

10.6. only meadow

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type=="meadow")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 95 taxa and 40 samples ]
## sample_data() Sample Data:       [ 40 samples by 30 sample variables ]
## tax_table()   Taxonomy Table:    [ 95 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$cluster,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df5 <- rbindlist(pval.list)
pval.list_df5 <- as.data.frame(pval.list_df5)
# make into long format: where the new column  called "vs" contains the sample cluster comparison, and new column "p_value" contains the p_values
pval.list_df5 <- gather(pval.list_df5, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df5 <- adjust_pvalue(pval.list_df5, p.col = "p_value", output.col = "adj_p", method = "BH")

11. combine all p-values

all.pvals <- rbind(pval.list_df1, pval.list_df2, pval.list_df3, pval.list_df4, pval.list_df5, pval.list_df7)

all.sig.pvals <- all.pvals[all.pvals$adj_p <= 0.05, ]   

write.csv2(all.sig.pvals, file = "\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS\\HEATMAP_sig_wilcox_p_values.csv", row.names = FALSE)

all.sig.genus <- unique(all.sig.pvals$genus)
length(all.sig.genus)
## [1] 78

12. Make ps with the significant taxa and get taxa mean and se values

# only keep sig

# filter by taxa name
ps_RA_nf_genus_pruned_sig <- prune_taxa(all.sig.genus, ps_RA_nf_genus_pruned)
ps_Heatmap <- ps_RA_nf_genus_pruned_sig
ps_Heatmap
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 77 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 30 sample variables ]
## tax_table()   Taxonomy Table:    [ 77 taxa by 1 taxonomic ranks ]
rm(ps_RA_nf_genus_pruned_sig)

df <- psmelt(ps_Heatmap)

x <- df %>%
  group_by(OTU, sample_type, depth) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

x
## # A tibble: 1,155 × 5
## # Groups:   OTU, sample_type [231]
##    OTU      sample_type depth         mean         se
##    <chr>    <fct>       <chr>        <dbl>      <dbl>
##  1 Acephala meadow      0...10  0          0         
##  2 Acephala meadow      10...20 0          0         
##  3 Acephala meadow      20...30 0.0000157  0.00000820
##  4 Acephala meadow      30...40 0.00601    0.00601   
##  5 Acephala meadow      40...   0.0343     0.0343    
##  6 Acephala organic     0...10  0.00000500 0.00000327
##  7 Acephala organic     10...20 0.00000534 0.00000270
##  8 Acephala organic     20...30 0.00000762 0.00000571
##  9 Acephala organic     30...40 0          0         
## 10 Acephala organic     40...   0          0         
## # ℹ 1,145 more rows
write.csv2(x, file = "\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS\\HEATMAP_sig_GENERA_mean_and_se.csv", row.names = FALSE)

13. MAKE HEATMAP

13.1. Add FUNGuild annotation for HEATMAP

13.1.1. Build FUNGuild

I need to make a separate funguild phyloseq for heatmap, where I have only the genus and higher level annotation (no species level)

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\YONI_ITS_analyses\\R_ITS_yoni\\Analyses_final\\RE_ANNOTATION_2024')

FG <- read.csv2("FUNGuild_31_05_2024.csv")

# what different levels there are:
unique(FG$taxonomicLevel)
## [1] "Genus"      "Species"    "Variety"    "Family"     "Order"     
## [6] "Phylum"     "Form"       "Subspecies"

I will get the annotations from genus and higher tax levels:

Genus

fg <- FG[FG$taxonomicLevel == "Genus", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "genus"
FG_tax_table <- merge(tax_table, fg, by = "genus", all.x = TRUE)


# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
# and OTU as row names

FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]

# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_gen"
colnames(FG_tax_table)[2] <- "guild_gen"

# save with new name
FUNGuild_gen <- FG_tax_table

Family

fg <- FG[FG$taxonomicLevel == "Family", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "family"
FG_tax_table <- merge(tax_table, fg, by = "family", all.x = TRUE)

# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]

# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_fam"
colnames(FG_tax_table)[2] <- "guild_fam"

# save with new name
FUNGuild_fam <- FG_tax_table

Order

fg <- FG[FG$taxonomicLevel == "Order", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "order"
FG_tax_table <- merge(tax_table, fg, by = "order", all.x = TRUE)

# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]

# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_ord"
colnames(FG_tax_table)[2] <- "guild_ord"

# save with new name
FUNGuild_ord <- FG_tax_table

Phylum

fg <- FG[FG$taxonomicLevel == "Phylum", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "phylum"
FG_tax_table <- merge(tax_table, fg, by = "phylum", all.x = TRUE)

# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]

# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_phy"
colnames(FG_tax_table)[2] <- "guild_phy"

# save with new name
FUNGuild_phy <- FG_tax_table

Combine all annotations:Genus Family Order Phylum

x <- left_join(rownames_to_column(FUNGuild_gen), rownames_to_column(FUNGuild_fam), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_ord), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_phy), by = "rowname")
# get the value from another column if NA
y <- x %>%
  mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_fam))

# get the value from another column if NA
y <- y %>%
  mutate(guild_gen = coalesce(guild_gen,guild_fam))

# same for order
# get the value from another column if NA
y <- y %>%
  mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_ord))

# get the value from another column if NA
y <- y %>%
  mutate(guild_gen = coalesce(guild_gen,guild_ord))

# same for phylum
# get the value from another column if NA
y <- y %>%
  mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_phy))

# get the value from another column if NA
y <- y %>%
  mutate(guild_gen = coalesce(guild_gen,guild_phy))

# then rename the gen columns
colnames(y)[2] <- "trophicMode"
colnames(y)[3] <- "guild"


# remove the rest of the columns
y <- y[, -c(4:11)]

# rownames
y2 <- y[,-1]
rownames(y2) <- y[,1]
# Remove empty spaces and remove also "|"

y2$trophicMode <- gsub(" ", "", y2$trophicMode, fixed = TRUE)
y2$guild <- gsub("|", "", y2$guild, fixed = TRUE)
y3 <- left_join(rownames_to_column(y2), rownames_to_column(tax_table), by = "rowname")
row.names(y3) <- y3$rowname
y3 <- y3[, -1]

Define AMFs, Ectomycorrhizal and Plant pathogens

Here in FUNGuild column:

  • Ectomycorrhizal = guilds containing “Ectomycorrhizal” from trophic mode Symbiotroph only, NOTE! this is same as pure Ectomycorrhizal!!
  • Arbuscular Mycorrhizal = all guilds containing “Arbuscular Mycorrhizal” from trophic mode Symbiotroph (no AMF in other trophic modes)
  • Endophyte = Pure endophytes from trophic mode Symbiotroph only
  • Plant Pathogen = Pure Plant Pathogens from trophic mode Pathotroph only
z <- y3

z <- z %>%  
   mutate(FG = case_when(grepl("Ectomycorrhizal", guild) & trophicMode=="Symbiotroph" ~ "Ectomycorrhizal", grepl("Arbuscular", guild) ~ "Arbuscular Mycorrhizal", guild == "Endophyte" & trophicMode=="Symbiotroph" ~ "Endophyte", guild=="Plant Pathogen" & trophicMode=="Pathotroph"~ "Plant Pathogen"))
z <- z %>%
  mutate(FUNGuild = case_when(FG == "Ectomycorrhizal" |  FG == "Arbuscular Mycorrhizal" | FG == "Endophyte" | FG == "Plant Pathogen" ~ FG, guild != "Ectomycorrhizal" |  FG != "Arbuscular Mycorrhizal" |  FG != "Endophyte" | FG != "Plant Pathogen" ~ z$trophicMode))
FG_tax_table <- z

# remove the FG column

FG_tax_table <- FG_tax_table[, -c(11)]

Check the different written forms, if I have empty spaces?

unique(FG_tax_table$trophicMode)
##  [1] "Saprotroph"                        NA                                 
##  [3] "Pathotroph-Saprotroph"             "Saprotroph-Symbiotroph"           
##  [5] "Pathotroph"                        "Symbiotroph"                      
##  [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"           
##  [9] "Saprotroph-Saprotroph-Symbiotroph" "Pathotroph-Pathotroph-Saprotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
##  [1] "Saprotroph"                        NA                                 
##  [3] "Pathotroph-Saprotroph"             "Saprotroph-Symbiotroph"           
##  [5] "Plant Pathogen"                    "Ectomycorrhizal"                  
##  [7] "Arbuscular Mycorrhizal"            "Pathotroph"                       
##  [9] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"           
## [11] "Symbiotroph"                       "Saprotroph-Saprotroph-Symbiotroph"
## [13] "Endophyte"                         "Pathotroph-Pathotroph-Saprotroph"
# change some names for FUNGuild
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Symbiotroph"] <- "Other Symbiotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph"] <- "Other Pathotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Saprotroph-Saprotroph-Symbiotroph"] <- "Saprotroph-Symbiotroph"

# and for trophicMode
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Saprotroph-Saprotroph-Symbiotroph"] <- "Saprotroph-Symbiotroph"

# reorder
FG_tax_table <- FG_tax_table[, c(1:2, 11, 3:10)]

Check again

unique(FG_tax_table$trophicMode)
## [1] "Saprotroph"                        NA                                 
## [3] "Pathotroph-Saprotroph"             "Saprotroph-Symbiotroph"           
## [5] "Pathotroph"                        "Symbiotroph"                      
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
##  [1] "Saprotroph"                        NA                                 
##  [3] "Pathotroph-Saprotroph"             "Saprotroph-Symbiotroph"           
##  [5] "Plant Pathogen"                    "Ectomycorrhizal"                  
##  [7] "Arbuscular Mycorrhizal"            "Other Pathotroph"                 
##  [9] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"           
## [11] "Other Symbiotroph"                 "Endophyte"

13.1.2. Save ps_FG for HEATMAP

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

load('ps_FINAL')

ps_FG_HEATMAP <- phyloseq(otu_table(ps), tax_table(as.matrix(FG_tax_table)), sample_data(ps))
ps_FG_HEATMAP
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 11 taxonomic ranks ]
save(ps_FG_HEATMAP, file = 'ps_FG_for_HEATMAP')

13.1.3.

FG_tax <- as.data.frame(as.matrix(tax_table(ps_FG_HEATMAP)))

13.1.4. Modify Depth

ps_FG_HEATMAP_nf <- subset_samples(ps_FG_HEATMAP, sample_type!="forest")
ps_FG_HEATMAP_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 11 taxonomic ranks ]
meta_nf <- meta(ps_FG_HEATMAP_nf)

meta_nf$new_depth <- meta_nf$depth

meta_nf$new_depth <- gsub("...", "-", meta_nf$new_depth, fixed = TRUE)

meta_nf$new_depth[meta_nf$new_depth=="40-"] <- "40-80"


meta_nf$new_depth[meta_nf$new_depth=="0-10"] <- "0-10 cm"
meta_nf$new_depth[meta_nf$new_depth=="10-20"] <- "10-20 cm"
meta_nf$new_depth[meta_nf$new_depth=="20-30"] <- "20-30 cm"
meta_nf$new_depth[meta_nf$new_depth=="30-40"] <- "30-40 cm"
meta_nf$new_depth[meta_nf$new_depth=="40-80"] <- "40-80 cm"

13.1.5.Transform RA and z scaled

ps_genus <- aggregate_rare(ps, level = 'genus', detection = 0/100, prevalence = 0/100)
ps_genus
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 943 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 943 taxa by 1 taxonomic ranks ]
# subset
ps_genus_NO_FOREST <- subset_samples(ps_genus, sample_type!="forest")
ps_genus_NO_FOREST
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 943 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 943 taxa by 1 taxonomic ranks ]
meta_nf <- meta(ps_genus_NO_FOREST)

# RA-transform
ps_genus_NO_FOREST_RA <- microbiome::transform(ps_genus_NO_FOREST, 'compositional')

# Z-transform
ps_genus_NO_FOREST_RA_z <- microbiome::transform(ps_genus_NO_FOREST_RA, 'Z', log10 = FALSE)

# subset to only the 77 sig genus from above

hetamap.taxa <- taxa_names(ps_Heatmap)
data <- as(otu_table(ps_genus_NO_FOREST_RA_z), "matrix")
data <- as.data.frame(data)
data_subset <- data[(row.names(data) %in% hetamap.taxa),]
# now only 77 genuses, as should be
data_subset <- as.matrix(data_subset)

# add annotations "depth" and "soil management"
my_sample_col <- data.frame(meta_nf[c("depth", "sample_type")], row.names = row.names(meta_nf))
colnames(my_sample_col) <- c("depth", "soil management")


x <- as.data.frame(t(data_subset))
y <- as.data.frame(my_sample_col)
colnames(y) <- c("depth", "sample_type")
z <- dplyr::left_join(rownames_to_column(x), rownames_to_column(y), by=c("rowname" = "rowname"))
colnames(z)[1] <- "ID"

f <- z %>%
  group_by(sample_type, depth) %>%
  summarise_all(mean)

# remove ID column
f <- f[ , -3]

# make new ID column, which is the soiltypedepth
library(stringr)
f$ID <- str_c(f$sample_type, '_', f$depth)
# make new df with just sample type, depth and sampletypedepth

df <- f[c("sample_type", "depth", "ID")]
# remove extra columns from f
f2 <- f[, -c(1, 2, 80)]
# column into rownames
rownames(f2) <- f$ID
# make into numeric matrix
f3 <- data.matrix(f2, rownames.force = NA)
f4 <- t(f3)

# same for df
# remove extra columns from df
df2 <- df[ , -c(3)]
# column into rownames
rownames(df2) <- df$ID
my_sample_col2 <- df2
my_sample_col3 <- as.data.frame(my_sample_col2)
rownames(my_sample_col3) <- df$ID
# lets add annotations of samples

# add annotations "depth" and "soil_type" and change order
colnames(my_sample_col3) <- c("soil management", "depth")
my_sample_col3 <- my_sample_col3[, c(2,1)]

13.1.6 Finally plot HEATMAP

library("pheatmap")
library("ggplotify")

# only keep the genus and FUNGuild
x <- FG_tax[, c(9, 3)]
# Remove duplicates by single column
FUNGuild_tax_table <- x[!duplicated(x$genus), ]
dim(FUNGuild_tax_table)
## [1] 943   2
# and genus as row names
rownames(FUNGuild_tax_table) <- NULL
FUNGuild_tax_table <- column_to_rownames(FUNGuild_tax_table, var = "genus")

# view data frame
unique(FUNGuild_tax_table$FUNGuild)
##  [1] NA                                  "Saprotroph-Symbiotroph"           
##  [3] "Arbuscular Mycorrhizal"            "Pathotroph-Saprotroph-Symbiotroph"
##  [5] "Pathotroph-Saprotroph"             "Saprotroph"                       
##  [7] "Endophyte"                         "Ectomycorrhizal"                  
##  [9] "Other Pathotroph"                  "Plant Pathogen"                   
## [11] "Other Symbiotroph"                 "Pathotroph-Symbiotroph"
FUNGuild_tax_table$FUNGuild <- as.factor(FUNGuild_tax_table$FUNGuild)

# change level order
FUNGuild_tax_table$FUNGuild <- factor(FUNGuild_tax_table$FUNGuild, levels = c("Plant Pathogen", "Other Pathotroph", "Pathotroph-Saprotroph", "Pathotroph-Saprotroph-Symbiotroph", "Pathotroph-Symbiotroph", "Saprotroph", "Saprotroph-Symbiotroph", "Other Symbiotroph", "Ectomycorrhizal", "Endophyte", "Arbuscular Mycorrhizal"))
levels(FUNGuild_tax_table$FUNGuild)
##  [1] "Plant Pathogen"                    "Other Pathotroph"                 
##  [3] "Pathotroph-Saprotroph"             "Pathotroph-Saprotroph-Symbiotroph"
##  [5] "Pathotroph-Symbiotroph"            "Saprotroph"                       
##  [7] "Saprotroph-Symbiotroph"            "Other Symbiotroph"                
##  [9] "Ectomycorrhizal"                   "Endophyte"                        
## [11] "Arbuscular Mycorrhizal"
#Create color palette

my_colour = list(
  "soil management" = c(meadow = "#fbc02d", organic = "#8a8a8a", conventional =  "#b71c1c"),
  depth = c('0...10' = "#387212", '10...20' = "#ADC476", '20...30' = "#D8D2BA",'30...40' = "#907852", '40...' = "#6A4C3A"),
  FUNGuild = c("Plant Pathogen" = "deeppink", "Other Pathotroph" = "#d6849a", "Pathotroph-Saprotroph" = "#e3adbc", "Pathotroph-Saprotroph-Symbiotroph" = "#f1d6dd", "Pathotroph-Symbiotroph" = "#faf1f4", "Saprotroph" = "#CBBEAD", "Saprotroph-Symbiotroph" = "darkseagreen", "Other Symbiotroph" = "lightgreen", "Ectomycorrhizal" = "darkgreen", "Endophyte" = "#A2CF31", "Arbuscular Mycorrhizal" = "darkolivegreen1"))

p2 <- as.ggplot(function() pheatmap(f4, cluster_cols = FALSE, cluster_rows = TRUE, annotation_col = my_sample_col3, annotation_colors = my_colour, color=colorRampPalette(c("navy", "white", "red"))(50), show_colnames = FALSE, legend = TRUE, annotation_row = FUNGuild_tax_table, border_color = NA, cellheight = 16, fontsize = 14, fontsize_row = 14, annotation_names_row = FALSE, annotation_names_col = FALSE))

B) HEATMAP for FOREST ONLY fungal genera no filtering by significant genera

1. load packages and data

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")


setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata 
ps_RA <- microbiome::transform(ps, "compositional")
ps_RA_nf <- subset_samples(ps_RA, sample_type=="forest")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_nf),
              MARGIN = ifelse(taxa_are_rows(ps_RA_nf), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_nf = prune_taxa((prev0 > 0), ps_RA_nf)
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 5398 taxa and 15 samples ]
## sample_data() Sample Data:       [ 15 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 5398 taxa by 7 taxonomic ranks ]
meta_nf <- meta(ps_RA_nf)

2. Overview of the heatmap analysis

I will use RA values For the final heatmap figure the RA values will be standardizes (z-transformed)

  1. First pick the 10 most abundant genera in each soil type depth
  2. Make a HEATMAP of the most abundant taxa only

3. edit unclassified taxa at genus level

# make a dataframe of the tax-table
tax <- as.data.frame(ps_RA@tax_table)
# calculate how many genus
length(unique(tax$genus))
## [1] 943
tax$genus <- sub(".*_.*", "Unclassified_genus",tax$genus)
# calculate how many genus
length(unique(tax$genus))
## [1] 710
# of which 1 is "Unclassified_genus" 

# edit the phyloseq object
ps_genus <- ps
tax <- as.matrix(tax) # convert it into a matrix.
tax <- tax_table(tax) # convert into phyloseq compatible file.
tax_table(ps_genus) <- tax # incroporate into phyloseq Object

ps_genus <- aggregate_rare(ps_genus, level = 'genus', detection = 0/100, prevalence = 0/140)
ps_genus
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 710 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 710 taxa by 2 taxonomic ranks ]
ps_genus_RA <- microbiome::transform(ps_genus, "compositional")

# remove unclassified
allTaxa = taxa_names(ps_genus_RA)
badTaxa = c("Unclassified_genus")
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
ps_genus_RA_pruned <- prune_taxa(myTaxa, ps_genus_RA)
ps_genus_RA_pruned
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 709 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 709 taxa by 2 taxonomic ranks ]

4. Pick 10 most abund taxa in each forest soil layer

# sample wise filtering according to most abund. genera

# Initialize an empty list to store the taxa
abund.taxa <- list()

for (i in meta_nf$sample_type) {
  for (j in meta_nf$depth) {
    x <- sample_names(sample_data(ps_genus_RA_pruned)[sample_data(ps_genus_RA_pruned)$sample_type == i & sample_data(ps_genus_RA_pruned)$depth == j,])

# Calculate taxa mean of the selected samples
top10 <- head(sort(rowMeans(otu_table(ps_genus_RA_pruned)[,x]), decreasing = TRUE), 10)

result_name <- paste("sample_type", i, "depth", j, sep = "_")

abund.taxa[[result_name]] <- top10
  }
}

5. Combine picked taxa

management_layer <- c(names(abund.taxa))

all_top10 <- c()

for (i in management_layer) {
  top10 <- c(names(abund.taxa[[i]]))
  all_top10 <- c(all_top10, top10)

}

all_top10_unique <- unique(all_top10)
length(all_top10_unique)
## [1] 30

6. Make a phyloseq of picked taxa

# first aggregate to genus
ps_RA_nf_genus <- aggregate_rare(ps_RA_nf, level = "genus", detection = 0/140, prevalence = 0/100)

# filter by taxa name
ps_RA_nf_genus_pruned <- prune_taxa(all_top10_unique, ps_RA_nf_genus)
ps_RA_nf_genus_pruned
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 30 taxa and 15 samples ]
## sample_data() Sample Data:       [ 15 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 30 taxa by 1 taxonomic ranks ]
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

ps_genus_FOREST_ONLY_HETAMAP <- ps_RA_nf_genus_pruned
save(ps_genus_FOREST_ONLY_HETAMAP, file='ps_genus_nf_HETAMAP_all_top10_FOREST_ONLY')

7. PCoA

OTU = as(otu_table(ps_RA_nf_genus_pruned), "matrix")
# transpose if necessary
if(taxa_are_rows(ps_RA_nf_genus_pruned)){OTU <- t(OTU)}
# Coerce to data.frame
OTU = as.data.frame(OTU)
OTU <- as.matrix(OTU)

bray_dist <- vegan::vegdist(OTU, method="bray")
str(bray_dist)
##  'dist' Named num [1:105] 0.804 0.919 0.941 0.999 0.736 ...
##  - attr(*, "maxdist")= num 1
##  - attr(*, "Size")= int 15
##  - attr(*, "Labels")= chr [1:15] "M1_0to10" "M1_10to20" "M1_20to30" "M1_30to40" ...
##  - attr(*, "Diag")= logi FALSE
##  - attr(*, "Upper")= logi FALSE
##  - attr(*, "method")= chr "bray"
##  - attr(*, "call")= language vegan::vegdist(x = OTU, method = "bray")
pcoa <- cmdscale(bray_dist, eig=TRUE, k = 3)
# To plot the output from the mds using ggplot a new datasheet needs to be created which contains the x,y points for each site. You can do this by calling the scores of you mds.
site.scrs <- as.data.frame(scores(pcoa, display = "sites")) #save pcoa results into dataframe
site.scrs <- cbind(site.scrs, management_type = meta_nf$sample_type) #add grouping variable "management_type" to dataframe
site.scrs <- cbind(site.scrs, depth = meta_nf$depth) #add grouping variable of depth

head(site.scrs)
##                 Dim1        Dim2        Dim3 management_type   depth
## M1_0to10   0.3123094 -0.07176912  0.25664796          forest  0...10
## M1_10to20  0.3524951 -0.09107174 -0.17221039          forest 10...20
## M1_20to30  0.2488208  0.08019621 -0.32338694          forest 20...30
## M1_30to40 -0.3062000  0.05421002 -0.10232306          forest 30...40
## M1_40to60 -0.3767020 -0.33420005  0.01743208          forest   40...
## M2_0to10   0.4386126 -0.08676279  0.11762377          forest  0...10
MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional =  "#b71c1c")

7.1. axes 1 and 2

first get axis %

GP.ord <- ordinate(ps_RA_nf_genus_pruned, "PCoA", "bray")
pord = plot_ordination(ps_RA_nf_genus_pruned, GP.ord, type="samples", color="sample_type", shape="depth")
pord

Change axis percentages accordingly!

pcoa.plot12 <- ggplot() + geom_point(data=site.scrs, aes(Dim1, Dim2, colour = factor(site.scrs$management_type), shape = factor(site.scrs$depth)), size = 5, alpha = 0.7) + theme_cowplot() + theme(panel.background = element_rect(fill = NA, colour = "black", size = 1, linetype = "solid")) + labs(colour = "", shape = "") + theme(legend.text = element_text(size = 12), axis.text = element_text(size = 16)) + scale_colour_manual(values = MyPalette)  + labs(y = "PC2 (18.8%)", x = "PC1 (28.5%)")


pcoa.plot12

7.2. axes 1 and 3

first get axis %

GP.ord <- ordinate(ps_RA_nf_genus_pruned, "PCoA", "bray", k = 3)
pord = plot_ordination(ps_RA_nf_genus_pruned, GP.ord, type="samples", axes = c(1, 3), color="sample_type", shape="depth")
pord

Change axis percentages accordingly!

pcoa.plot13 <- ggplot() + geom_point(data=site.scrs, aes(Dim1, Dim3, colour = factor(site.scrs$management_type), shape = factor(site.scrs$depth)), size = 5, alpha = 0.7) + theme_cowplot() + theme(panel.background = element_rect(fill = NA, colour = "black", size = 1, linetype = "solid")) + labs(colour = "", shape = "") + theme(legend.text = element_text(size = 13), axis.text = element_text(size = 16)) + scale_colour_manual(values = MyPalette) + labs(y = "PC3 (11.0%)", x = "PC1 (28.5%)")


pcoa.plot13

8. Combine figures

library("ggpubr")
figure <- ggarrange(pcoa.plot12, pcoa.plot13,
                    labels = c("A", "B"),
                    ncol = 2, nrow = 1, common.legend = TRUE, legend="right", widths = c(1, 1))
figure

9. Get taxa mean and se values

ps_Heatmap <- ps_genus_FOREST_ONLY_HETAMAP
ps_Heatmap
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 30 taxa and 15 samples ]
## sample_data() Sample Data:       [ 15 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 30 taxa by 1 taxonomic ranks ]
df <- psmelt(ps_Heatmap)

x <- df %>%
  group_by(OTU, sample_type, depth) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

x
## # A tibble: 150 × 5
## # Groups:   OTU, sample_type [30]
##    OTU      sample_type depth        mean        se
##    <chr>    <fct>       <chr>       <dbl>     <dbl>
##  1 Botrytis forest      0...10  0.0000325 0.0000325
##  2 Botrytis forest      10...20 0.0000800 0.0000800
##  3 Botrytis forest      20...30 0         0        
##  4 Botrytis forest      30...40 0         0        
##  5 Botrytis forest      40...   0.163     0.163    
##  6 Chalara  forest      0...10  0.0177    0.0156   
##  7 Chalara  forest      10...20 0.00361   0.00175  
##  8 Chalara  forest      20...30 0.000330  0.000167 
##  9 Chalara  forest      30...40 0.0000708 0.0000708
## 10 Chalara  forest      40...   0         0        
## # ℹ 140 more rows
write.csv2(x, file = "\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS\\HEATMAP_sig_GENERA_mean_and_se_FOREST_ONLY_all_10_most_abund.csv", row.names = FALSE)

13. MAKE HEATMAP

13.1. Add FUNGuild annotation for HEATMAP

13.1.1. Build FUNGuild

I need to make a separate funguild phyloseq for heatmap, where I have only the genus and higher level annotation (no species level)

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\YONI_ITS_analyses\\R_ITS_yoni\\Analyses_final\\RE_ANNOTATION_2024')

FG <- read.csv2("FUNGuild_31_05_2024.csv")

# what different levels there are:
unique(FG$taxonomicLevel)
## [1] "Genus"      "Species"    "Variety"    "Family"     "Order"     
## [6] "Phylum"     "Form"       "Subspecies"

I will get the annotations from genus and higher tax levels:

Genus

fg <- FG[FG$taxonomicLevel == "Genus", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "genus"
FG_tax_table <- merge(tax_table, fg, by = "genus", all.x = TRUE)


# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
# and OTU as row names

FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]

# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_gen"
colnames(FG_tax_table)[2] <- "guild_gen"

# save with new name
FUNGuild_gen <- FG_tax_table

Family

fg <- FG[FG$taxonomicLevel == "Family", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "family"
FG_tax_table <- merge(tax_table, fg, by = "family", all.x = TRUE)

# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]

# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_fam"
colnames(FG_tax_table)[2] <- "guild_fam"

# save with new name
FUNGuild_fam <- FG_tax_table

Order

fg <- FG[FG$taxonomicLevel == "Order", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "order"
FG_tax_table <- merge(tax_table, fg, by = "order", all.x = TRUE)

# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]

# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_ord"
colnames(FG_tax_table)[2] <- "guild_ord"

# save with new name
FUNGuild_ord <- FG_tax_table

Phylum

fg <- FG[FG$taxonomicLevel == "Phylum", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "phylum"
FG_tax_table <- merge(tax_table, fg, by = "phylum", all.x = TRUE)

# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]

# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_phy"
colnames(FG_tax_table)[2] <- "guild_phy"

# save with new name
FUNGuild_phy <- FG_tax_table

Combine all annotations:Genus Family Order Phylum

x <- left_join(rownames_to_column(FUNGuild_gen), rownames_to_column(FUNGuild_fam), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_ord), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_phy), by = "rowname")
# get the value from another column if NA
y <- x %>%
  mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_fam))

# get the value from another column if NA
y <- y %>%
  mutate(guild_gen = coalesce(guild_gen,guild_fam))

# same for order
# get the value from another column if NA
y <- y %>%
  mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_ord))

# get the value from another column if NA
y <- y %>%
  mutate(guild_gen = coalesce(guild_gen,guild_ord))

# same for phylum
# get the value from another column if NA
y <- y %>%
  mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_phy))

# get the value from another column if NA
y <- y %>%
  mutate(guild_gen = coalesce(guild_gen,guild_phy))

# then rename the gen columns
colnames(y)[2] <- "trophicMode"
colnames(y)[3] <- "guild"


# remove the rest of the columns
y <- y[, -c(4:11)]

# rownames
y2 <- y[,-1]
rownames(y2) <- y[,1]
# Remove empty spaces and remove also "|"

y2$trophicMode <- gsub(" ", "", y2$trophicMode, fixed = TRUE)
y2$guild <- gsub("|", "", y2$guild, fixed = TRUE)
y3 <- left_join(rownames_to_column(y2), rownames_to_column(tax_table), by = "rowname")
row.names(y3) <- y3$rowname
y3 <- y3[, -1]

Define AMFs, Ectomycorrhizal and Plant pathogens

Here in FUNGuild column:

  • Ectomycorrhizal = guilds containing “Ectomycorrhizal” from trophic mode Symbiotroph only, NOTE! this is same as pure Ectomycorrhizal!!
  • Arbuscular Mycorrhizal = all guilds containing “Arbuscular Mycorrhizal” from trophic mode Symbiotroph (no AMF in other trophic modes)
  • Endophyte = Pure endophytes from trophic mode Symbiotroph only
  • Plant Pathogen = Pure Plant Pathogens from trophic mode Pathotroph only
z <- y3

z <- z %>%  
   mutate(FG = case_when(grepl("Ectomycorrhizal", guild) & trophicMode=="Symbiotroph" ~ "Ectomycorrhizal", grepl("Arbuscular", guild) ~ "Arbuscular Mycorrhizal", guild == "Endophyte" & trophicMode=="Symbiotroph" ~ "Endophyte", guild=="Plant Pathogen" & trophicMode=="Pathotroph"~ "Plant Pathogen"))
z <- z %>%
  mutate(FUNGuild = case_when(FG == "Ectomycorrhizal" |  FG == "Arbuscular Mycorrhizal" | FG == "Endophyte" | FG == "Plant Pathogen" ~ FG, guild != "Ectomycorrhizal" |  FG != "Arbuscular Mycorrhizal" |  FG != "Endophyte" | FG != "Plant Pathogen" ~ z$trophicMode))
FG_tax_table <- z

# remove the FG column

FG_tax_table <- FG_tax_table[, -c(11)]

Check the different written forms, if I have empty spaces?

unique(FG_tax_table$trophicMode)
##  [1] "Saprotroph"                        NA                                 
##  [3] "Pathotroph-Saprotroph"             "Saprotroph-Symbiotroph"           
##  [5] "Pathotroph"                        "Symbiotroph"                      
##  [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"           
##  [9] "Saprotroph-Saprotroph-Symbiotroph" "Pathotroph-Pathotroph-Saprotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
##  [1] "Saprotroph"                        NA                                 
##  [3] "Pathotroph-Saprotroph"             "Saprotroph-Symbiotroph"           
##  [5] "Plant Pathogen"                    "Ectomycorrhizal"                  
##  [7] "Arbuscular Mycorrhizal"            "Pathotroph"                       
##  [9] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"           
## [11] "Symbiotroph"                       "Saprotroph-Saprotroph-Symbiotroph"
## [13] "Endophyte"                         "Pathotroph-Pathotroph-Saprotroph"
# change some names for FUNGuild
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Symbiotroph"] <- "Other Symbiotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph"] <- "Other Pathotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Saprotroph-Saprotroph-Symbiotroph"] <- "Saprotroph-Symbiotroph"

# and for trophicMode
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Saprotroph-Saprotroph-Symbiotroph"] <- "Saprotroph-Symbiotroph"

# reorder
FG_tax_table <- FG_tax_table[, c(1:2, 11, 3:10)]

Check again

unique(FG_tax_table$trophicMode)
## [1] "Saprotroph"                        NA                                 
## [3] "Pathotroph-Saprotroph"             "Saprotroph-Symbiotroph"           
## [5] "Pathotroph"                        "Symbiotroph"                      
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
##  [1] "Saprotroph"                        NA                                 
##  [3] "Pathotroph-Saprotroph"             "Saprotroph-Symbiotroph"           
##  [5] "Plant Pathogen"                    "Ectomycorrhizal"                  
##  [7] "Arbuscular Mycorrhizal"            "Other Pathotroph"                 
##  [9] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"           
## [11] "Other Symbiotroph"                 "Endophyte"

13.1.2. Save ps_FG for HEATMAP

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

load('ps_FINAL')

ps_FG_HEATMAP <- phyloseq(otu_table(ps), tax_table(as.matrix(FG_tax_table)), sample_data(ps))
ps_FG_HEATMAP
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 11 taxonomic ranks ]

13.1.3.

FG_tax <- as.data.frame(as.matrix(tax_table(ps_FG_HEATMAP)))

13.1.4. Modify Depth

ps_FG_HEATMAP_nf <- subset_samples(ps_FG_HEATMAP, sample_type=="forest")
ps_FG_HEATMAP_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 15 samples ]
## sample_data() Sample Data:       [ 15 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 11 taxonomic ranks ]
meta_nf <- meta(ps_FG_HEATMAP_nf)

meta_nf$new_depth <- meta_nf$depth

meta_nf$new_depth <- gsub("...", "-", meta_nf$new_depth, fixed = TRUE)

meta_nf$new_depth[meta_nf$new_depth=="40-"] <- "40-80"


meta_nf$new_depth[meta_nf$new_depth=="0-10"] <- "0-10 cm"
meta_nf$new_depth[meta_nf$new_depth=="10-20"] <- "10-20 cm"
meta_nf$new_depth[meta_nf$new_depth=="20-30"] <- "20-30 cm"
meta_nf$new_depth[meta_nf$new_depth=="30-40"] <- "30-40 cm"
meta_nf$new_depth[meta_nf$new_depth=="40-80"] <- "40-80 cm"

13.1.5.Transform RA and z scaled

ps_genus <- aggregate_rare(ps, level = 'genus', detection = 0/100, prevalence = 0/100)
ps_genus
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 943 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 943 taxa by 1 taxonomic ranks ]
# subset
ps_genus_NO_FOREST <- subset_samples(ps_genus, sample_type=="forest")
ps_genus_NO_FOREST
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 943 taxa and 15 samples ]
## sample_data() Sample Data:       [ 15 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 943 taxa by 1 taxonomic ranks ]
meta_nf <- meta(ps_genus_NO_FOREST)

# RA-transform
ps_genus_NO_FOREST_RA <- microbiome::transform(ps_genus_NO_FOREST, 'compositional')

# Z-transform
ps_genus_NO_FOREST_RA_z <- microbiome::transform(ps_genus_NO_FOREST_RA, 'Z', log10 = FALSE)

# subset to only the 30 genera from above

hetamap.taxa <- taxa_names(ps_Heatmap)
data <- as(otu_table(ps_genus_NO_FOREST_RA_z), "matrix")
data <- as.data.frame(data)
data_subset <- data[(row.names(data) %in% hetamap.taxa),]
# now only 30 genuses, as should be
data_subset <- as.matrix(data_subset)

# add annotations "depth" and "soil management"
my_sample_col <- data.frame(meta_nf[c("depth", "sample_type")], row.names = row.names(meta_nf))
colnames(my_sample_col) <- c("depth", "soil management")


x <- as.data.frame(t(data_subset))
y <- as.data.frame(my_sample_col)
colnames(y) <- c("depth", "sample_type")
z <- dplyr::left_join(rownames_to_column(x), rownames_to_column(y), by=c("rowname" = "rowname"))
colnames(z)[1] <- "ID"

f <- z %>%
  group_by(sample_type, depth) %>%
  summarise_all(mean)

# remove ID column
f <- f[ , -3]

# make new ID column, which is the soiltypedepth
library(stringr)
f$ID <- str_c(f$sample_type, '_', f$depth)
# make new df with just sample type, depth and sampletypedepth

df <- f[c("sample_type", "depth", "ID")]
# remove extra columns from f
f2 <- f[, -c(1, 2, 33)]
# column into rownames
rownames(f2) <- f$ID
# make into numeric matrix
f3 <- data.matrix(f2, rownames.force = NA)
f4 <- t(f3)

# same for df
# remove extra columns from df
df2 <- df[ , -c(3)]
# column into rownames
rownames(df2) <- df$ID
my_sample_col2 <- df2
my_sample_col3 <- as.data.frame(my_sample_col2)
rownames(my_sample_col3) <- df$ID
# lets add annotations of samples

# add annotations "depth" and "soil_type" and change order
colnames(my_sample_col3) <- c("soil management", "depth")
my_sample_col3 <- my_sample_col3[, c(2,1)]

13.1.6 Finally plot HEATMAP

library("pheatmap")

# only keep the genus and FUNGuild
x <- FG_tax[, c(9, 3)]
# Remove duplicates by single column
FUNGuild_tax_table <- x[!duplicated(x$genus), ]
dim(FUNGuild_tax_table)
## [1] 943   2
# and genus as row names
rownames(FUNGuild_tax_table) <- NULL
FUNGuild_tax_table <- column_to_rownames(FUNGuild_tax_table, var = "genus")

# view data frame
unique(FUNGuild_tax_table$FUNGuild)
##  [1] NA                                  "Saprotroph-Symbiotroph"           
##  [3] "Arbuscular Mycorrhizal"            "Pathotroph-Saprotroph-Symbiotroph"
##  [5] "Pathotroph-Saprotroph"             "Saprotroph"                       
##  [7] "Endophyte"                         "Ectomycorrhizal"                  
##  [9] "Other Pathotroph"                  "Plant Pathogen"                   
## [11] "Other Symbiotroph"                 "Pathotroph-Symbiotroph"
FUNGuild_tax_table$FUNGuild <- as.factor(FUNGuild_tax_table$FUNGuild)

# change level order
FUNGuild_tax_table$FUNGuild <- factor(FUNGuild_tax_table$FUNGuild, levels = c("Plant Pathogen", "Other Pathotroph", "Pathotroph-Saprotroph", "Pathotroph-Saprotroph-Symbiotroph", "Pathotroph-Symbiotroph", "Saprotroph", "Saprotroph-Symbiotroph", "Other Symbiotroph", "Ectomycorrhizal", "Endophyte", "Arbuscular Mycorrhizal"))
levels(FUNGuild_tax_table$FUNGuild)
##  [1] "Plant Pathogen"                    "Other Pathotroph"                 
##  [3] "Pathotroph-Saprotroph"             "Pathotroph-Saprotroph-Symbiotroph"
##  [5] "Pathotroph-Symbiotroph"            "Saprotroph"                       
##  [7] "Saprotroph-Symbiotroph"            "Other Symbiotroph"                
##  [9] "Ectomycorrhizal"                   "Endophyte"                        
## [11] "Arbuscular Mycorrhizal"
#Create color palette

my_colour = list(
  "soil management" = c(forest = "#1167b1"),
  depth = c('0...10' = "#387212", '10...20' = "#ADC476", '20...30' = "#D8D2BA",'30...40' = "#907852", '40...' = "#6A4C3A"),
  FUNGuild = c("Plant Pathogen" = "deeppink", "Other Pathotroph" = "#d6849a", "Pathotroph-Saprotroph" = "#e3adbc", "Pathotroph-Saprotroph-Symbiotroph" = "#f1d6dd", "Pathotroph-Symbiotroph" = "#faf1f4", "Saprotroph" = "#CBBEAD", "Saprotroph-Symbiotroph" = "darkseagreen", "Other Symbiotroph" = "lightgreen", "Ectomycorrhizal" = "darkgreen", "Endophyte" = "#A2CF31", "Arbuscular Mycorrhizal" = "darkolivegreen1"))

p3 <- as.ggplot(function() pheatmap(f4, cluster_cols = FALSE, cluster_rows = TRUE, annotation_col = my_sample_col3, annotation_colors = my_colour, color=colorRampPalette(c("navy", "white", "red"))(50), show_colnames = FALSE, legend = TRUE, annotation_row = FUNGuild_tax_table, border_color = NA, cellheight = 16, fontsize = 14, fontsize_row = 14, annotation_names_row = FALSE, annotation_names_col = FALSE))

C) COMPOSTION PLOTS

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)

library(metagMisc)
library(pheatmap)
library(metagMisc)
library(RColorBrewer)
library(viridis)
library(tidyverse)
library(ggpubr)

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata 
ps_RA <- microbiome::transform(ps, "compositional")

load(file = 'ps_FG_with_NAs')#ps_FG
ps_FG
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 11 taxonomic ranks ]

1. FUNGuild composition

FG_tax <- ps_FG %>% tax_table() %>% as.data.frame()

unique(FG_tax$trophicMode)
## [1] NA                                  "Saprotroph-Symbiotroph"           
## [3] "Symbiotroph"                       "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph"             "Pathotroph"                       
## [7] "Saprotroph"                        "Pathotroph-Symbiotroph"

1.1. separate pure Plant pathogens

Note! In the composition figure:

  • Ectomycorrhizal = guilds containing “Ectomycorrhizal” from trophic mode Symbiotroph only, NOTE! thiis is same as pure Ectomycorrhizal!!
  • Arbuscular Mycorrhizal = all guilds containing “Arbuscular Mycorrhizal” from trophic mode Symbiotroph (nor AMF in other TMs)
  • Endophyte = Pure endophytes from trophic mode Symbiotroph only
  • Plant Pathogen = Pure Plant Pathogens from trophic mode Pathotroph only
z <- FG_tax

z <- z %>%  
   mutate(FG = case_when(grepl("Ectomycorrhizal", guild) & trophicMode=="Symbiotroph" ~ "Ectomycorrhizal", grepl("Arbuscular", guild) ~ "Arbuscular Mycorrhizal", guild == "Endophyte" & trophicMode=="Symbiotroph" ~ "Endophyte", guild=="Plant Pathogen" & trophicMode=="Pathotroph"~ "Plant Pathogen"))
z <- z %>%
  mutate(FUNGuild = case_when(FG == "Ectomycorrhizal" |  FG == "Arbuscular Mycorrhizal" | FG == "Endophyte" | FG == "Plant Pathogen" ~ FG, guild != "Ectomycorrhizal" |  FG != "Arbuscular Mycorrhizal" |  FG != "Endophyte" | FG != "Plant Pathogen" ~ z$trophicMode))
# change some names for FUNGuild
z$FUNGuild[z$FUNGuild=="Symbiotroph"] <- "Other Symbiotroph"
z$FUNGuild[z$FUNGuild=="Pathotroph"] <- "Other Pathotroph"
# remove species and FG
tax <- z[, -c(10, 12)]

unique(tax$FUNGuild)
##  [1] NA                                  "Saprotroph-Symbiotroph"           
##  [3] "Arbuscular Mycorrhizal"            "Pathotroph-Saprotroph-Symbiotroph"
##  [5] "Pathotroph-Saprotroph"             "Other Pathotroph"                 
##  [7] "Saprotroph"                        "Endophyte"                        
##  [9] "Ectomycorrhizal"                   "Pathotroph-Symbiotroph"           
## [11] "Plant Pathogen"                    "Other Symbiotroph"
# rename FUNGuild to species
colnames(tax)[3] <- "species"
tax <- tax %>% as.matrix()

# reassign to phyloseq
x <- ps_FG
tax_table(x) <- tax_table(tax)

x <- aggregate_rare(x, level = "species", detection = 0, prevalence = 0)
x               
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 12 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 12 taxa by 2 taxonomic ranks ]
# 12 taxa and 140 samples

# lets not remove NAs!!

# remove "Unknown"
allTaxa = taxa_names(x)
badTaxa = c("Unknown")
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
x <- prune_taxa(myTaxa, x)
x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 11 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 11 taxa by 2 taxonomic ranks ]
# 11 taxa and 140 samples

x_RA <- transform(x, 'compositional')

1.2. Plot

#create data table
df <-  psmelt(x_RA)

sampletype_names <- list(
  'forest' = "forest", 'meadow' = "meadow", 'organic' = "organic", 'conventional' = "conventional"
)

sampletype_labeller <- function(variable,value){
  return(sampletype_names[value])
}

df$species <- factor(df$species)

levels(df$species)
##  [1] "Arbuscular Mycorrhizal"            "Ectomycorrhizal"                  
##  [3] "Endophyte"                         "Other Pathotroph"                 
##  [5] "Other Symbiotroph"                 "Pathotroph-Saprotroph"            
##  [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"           
##  [9] "Plant Pathogen"                    "Saprotroph"                       
## [11] "Saprotroph-Symbiotroph"
# change level order

df$species <- factor(df$species, levels = c("Plant Pathogen", "Other Pathotroph", "Pathotroph-Saprotroph", "Pathotroph-Saprotroph-Symbiotroph", "Pathotroph-Symbiotroph", "Saprotroph", "Saprotroph-Symbiotroph", "Other Symbiotroph", "Ectomycorrhizal", "Endophyte", "Arbuscular Mycorrhizal"))
levels(FUNGuild_tax_table$FUNGuild)
##  [1] "Plant Pathogen"                    "Other Pathotroph"                 
##  [3] "Pathotroph-Saprotroph"             "Pathotroph-Saprotroph-Symbiotroph"
##  [5] "Pathotroph-Symbiotroph"            "Saprotroph"                       
##  [7] "Saprotroph-Symbiotroph"            "Other Symbiotroph"                
##  [9] "Ectomycorrhizal"                   "Endophyte"                        
## [11] "Arbuscular Mycorrhizal"
#Create color palette

my_colour = list(
  "soil management" = c(meadow = "#fbc02d", organic = "#8a8a8a", conventional =  "#b71c1c"),
  depth = c('0...10' = "#387212", '10...20' = "#ADC476", '20...30' = "#D8D2BA",'30...40' = "#907852", '40...' = "#6A4C3A"),
  FUNGuild = c("Plant Pathogen" = "deeppink", "Other Pathotroph" = "#d6849a", "Pathotroph-Saprotroph" = "#e3adbc", "Pathotroph-Saprotroph-Symbiotroph" = "#f1d6dd", "Pathotroph-Symbiotroph" = "#faf1f4", "Saprotroph" = "#CBBEAD", "Saprotroph-Symbiotroph" = "darkseagreen", "Other Symbiotroph" = "lightgreen", "Ectomycorrhizal" = "darkgreen", "Endophyte" = "#A2CF31", "Arbuscular Mycorrhizal" = "darkolivegreen1"))


#Create color palette
cbbPalette_reduced <- c("deeppink", "#d6849a", "#e3adbc", "#f1d6dd", "#faf1f4", "#CBBEAD", "darkseagreen","lightgreen", "darkgreen", "#A2CF31", "darkolivegreen1")

# Make new depth variable

df$new_depth <- df$depth

df$new_depth <- gsub("...", "-", df$new_depth, fixed = TRUE)

df$new_depth[df$new_depth=="40-"] <- "40-80"


df$new_depth[df$new_depth=="0-10"] <- "0-10 cm"
df$new_depth[df$new_depth=="10-20"] <- "10-20 cm"
df$new_depth[df$new_depth=="20-30"] <- "20-30 cm"
df$new_depth[df$new_depth=="30-40"] <- "30-40 cm"
df$new_depth[df$new_depth=="40-80"] <- "40-80 cm"

FG  <- ggplot(df, aes(x = new_depth ,y = Abundance, fill = species)) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = cbbPalette_reduced) + facet_grid (cols = vars(sample_type), labeller=sampletype_labeller) + geom_bar(stat="identity", position="fill") + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=14),
        axis.title = element_text(size=18),
        legend.text = element_text(size=16),
        legend.title = element_text(size=18),
        legend.spacing.y = unit(0, 'cm'),
        legend.key.size = unit(0.8, 'cm'),
        title = element_text(size=18))+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill = guide_legend(nrow = 4, title="")) + theme(strip.text.x = element_text(size = 22)) +
  ylab(label = "Relative abundance") + theme(legend.position="top") +  xlab("Depth")

FG

2. Class composition

ps_RA <- microbiome::transform(ps, 'compositional')

ps_RA.class <- aggregate_rare(ps_RA, level = 'class', detection = 3/100, prevalence = 3/140, include.lowest = TRUE)
ps_RA.class
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 16 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 16 taxa by 2 taxonomic ranks ]
#create data table
ps_RA.class_df <-  psmelt(ps_RA.class)

#Create color palette
#set color palette from RColorBrewer
# Define the number of colors you want
library("RColorBrewer") # nice color options
nb.cols = length(unique((as.data.frame(ps_RA.class@tax_table))$class))
cbbPalette <- colorRampPalette(brewer.pal(12, "Set3"))(nb.cols)

sampletype_names <- list(
  'forest' = "forest", 'meadow' = "meadow", 'organic' = "organic", 'conventional' = "conventional"
)

sampletype_labeller <- function(variable,value){
  return(sampletype_names[value])
}

# check unique values for class
unique(ps_RA.class_df$class)
##  [1] "Leotiomycetes"              "Agaricomycetes"            
##  [3] "Dothideomycetes"            "Sordariomycetes"           
##  [5] "Archaeosporomycetes"        "Mortierellomycetes"        
##  [7] "Geoglossomycetes"           "Tremellomycetes"           
##  [9] "Ascomycota_unclassified"    "Glomeromycetes"            
## [11] "Microbotryomycetes"         "Pezizomycetes"             
## [13] "Other"                      "Eurotiomycetes"            
## [15] "Basidiomycota_unclassified" "Orbiliomycetes"
is.factor(ps_RA.class_df$class)
## [1] FALSE
ps_RA.class_df$class <- as.factor(ps_RA.class_df$class)
levels(ps_RA.class_df$class)
##  [1] "Agaricomycetes"             "Archaeosporomycetes"       
##  [3] "Ascomycota_unclassified"    "Basidiomycota_unclassified"
##  [5] "Dothideomycetes"            "Eurotiomycetes"            
##  [7] "Geoglossomycetes"           "Glomeromycetes"            
##  [9] "Leotiomycetes"              "Microbotryomycetes"        
## [11] "Mortierellomycetes"         "Orbiliomycetes"            
## [13] "Other"                      "Pezizomycetes"             
## [15] "Sordariomycetes"            "Tremellomycetes"
ps_RA.class_df$class <- relevel(ps_RA.class_df$class, "Other")

cbbPalette <- c("#b2b2b2", "#8DD3C7", "#FFED6F", "#CAAEC5", "#F68378", "#8D6942", "#F3B962", "#BCD868", "#6E99BE", "#F0D1E1", "#C191C2", "#FFFFC6", "darkgreen", "#D0D9CD", "#8BC081", "#FF8DB5")



# plot with detection = 1/100, prevalence = 2/100
#Create a plot
classF  <- ggplot(ps_RA.class_df, aes(x = depth ,y = Abundance, fill = class)) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = cbbPalette) + facet_grid (cols = vars(sample_type), labeller=sampletype_labeller) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = cbbPalette) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=14),
        axis.title = element_text(size=18),
        legend.text = element_text(size=16),
        legend.title = element_text(size=18),
        legend.spacing.y = unit(0, 'cm'),
        legend.key.size = unit(0.8, 'cm'),
        title = element_text(size=18))+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill = guide_legend(nrow = 6, title="")) + theme(strip.text.x = element_text(size = 22)) +
  ylab(label = "Relative abundance") + theme(legend.position="top") +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.x = element_blank())

classF

3. Phylum composition

ps_RA_phyla_aggr <- aggregate_rare(ps_RA, level = 'phylum', detection = 2/100, prevalence = 2/140)
ps_RA_phyla_aggr
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 6 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 6 taxa by 2 taxonomic ranks ]
#create data table
ps_RA_phyla_df <-  psmelt(ps_RA_phyla_aggr)

#Create color palette
cbbPalette <- c("#666666","#1B9E77", "#D95F02", "#E7298A", "#7570B3", "#66A61E")

sampletype_names <- list(
  'forest' = "forest", 'meadow' = "meadow", 'organic' = "organic", 'conventional' = "conventional"
)

sampletype_labeller <- function(variable,value){
  return(sampletype_names[value])
}


# check unique values for phylum
unique(ps_RA_phyla_df$phylum)
## [1] "Ascomycota"        "Basidiomycota"     "Glomeromycota"    
## [4] "Mortierellomycota" "Rozellomycota"     "Other"
# [1] "Ascomycota"        "Basidiomycota"     "Glomeromycota"     "Mortierellomycota"
# [5] "Rozellomycota"     "Other"  

ps_RA_phyla_df$phylum <- as.factor(ps_RA_phyla_df$phylum)
levels(ps_RA_phyla_df$phylum)
## [1] "Ascomycota"        "Basidiomycota"     "Glomeromycota"    
## [4] "Mortierellomycota" "Other"             "Rozellomycota"
ps_RA_phyla_df$phylum <- factor(ps_RA_phyla_df$phylum, levels = c("Other","Ascomycota", "Basidiomycota", "Glomeromycota", "Mortierellomycota", "Rozellomycota"))
levels(ps_RA_phyla_df$phylum)
## [1] "Other"             "Ascomycota"        "Basidiomycota"    
## [4] "Glomeromycota"     "Mortierellomycota" "Rozellomycota"
#Create a plot
phylumF  <- ggplot(ps_RA_phyla_df, aes(x = depth ,y = Abundance, fill = phylum)) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = cbbPalette) + facet_grid (cols = vars(sample_type), labeller=sampletype_labeller) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = cbbPalette) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=14),
        axis.title = element_text(size=18),
        legend.text = element_text(size=16),
        legend.title = element_text(size=18),
        legend.spacing.y = unit(0, 'cm'),
        legend.key.size = unit(0.8, 'cm'),
        title = element_text(size=18))+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill = guide_legend(title="")) + theme(strip.text.x = element_text(size = 22)) +
  ylab(label = "Relative abundance") + theme(legend.position="top") +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.x = element_blank())

phylumF

5. Combine composition plots and heatmap figures

5.1. Composition bar plots

library(ggplotify)


left <- ggarrange(phylumF, classF , FG,
                  labels = c("A", "B", "C"),
                  ncol = 1, nrow = 3, heights = c(1.1, 1.5, 1.5))

left

5.2. Combine Heatmaps

right <- ggarrange(p2, p3,
                       labels = c("D", "E"),
                       ncol = 1, nrow = 2, heights = c(3, 1.3))

right

5.3. Combine composition and heatmap

figure <- ggarrange(left, right,
                    ncol = 2, nrow = 1, heights = c(1, 1), widths = c(1, 1.2))

figure

Saved with width 2400 and height 2600


RESULTS STEPS 7:FUNGuild testing

Note! While doing tests, always check the homogeneity of variance result and decide based on that which later test result to use (Anova and Tukey or Kruskal and Wilcoxon)

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

load(file = 'ps_FG_with_NAs')#ps_FG
ps_FG
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 11 taxonomic ranks ]

1. Test Plant Pathogen

Note:

# remove species

z <- as.data.frame(tax_table(ps_FG))

tax <- z[, 1:9]

# rename FUNGuild to species
colnames(tax)[3] <- "species"
tax <- tax %>% as.matrix()

# reassign to phyloseq
x <- ps_FG
tax_table(x) <- tax_table(tax)

x <- aggregate_rare(x, level = "species", detection = 0, prevalence = 0)
x               
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 12 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 12 taxa by 2 taxonomic ranks ]
# 12 taxa and 140 samples


# remove "Unknown"
allTaxa = taxa_names(x)
badTaxa = c("Unknown")
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
x <- prune_taxa(myTaxa, x)
x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 11 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 11 taxa by 2 taxonomic ranks ]
# 11 taxa and 140 samples

x_RA <- transform(x, 'compositional')
#create data table
FG_df <-  psmelt(x_RA)
unique(FG_df$OTU)
##  [1] "Saprotroph"                        "Ectomycorrhizal"                  
##  [3] "Saprotroph-Symbiotroph"            "Pathotroph-Saprotroph"            
##  [5] "Pathotroph-Symbiotroph"            "Arbuscular Mycorrhizal"           
##  [7] "Endophyte"                         "Pathotroph-Saprotroph-Symbiotroph"
##  [9] "Other Pathotroph"                  "Plant Pathogen"                   
## [11] "Other Symbiotroph"
taxa <- "Plant Pathogen"

# Construct a data.frame with the selected
# taxonomic group and grouping
# relative
df <- filter(FG_df, OTU == taxa)

library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value   Pr(>F)   
## group   3  5.1088 0.002218 **
##       136                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# first one-way ANOVA

res.aov <- aov(Abundance ~ sample_type, data = df)
# Summary of the analysis
summary(res.aov)
##              Df  Sum Sq  Mean Sq F value   Pr(>F)    
## sample_type   3 0.03572 0.011905   8.387 3.72e-05 ***
## Residuals   136 0.19304 0.001419                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ sample_type, data = df)
## 
## $sample_type
##                              diff          lwr        upr     p adj
## meadow-forest         0.009502785 -0.020166943 0.03917251 0.8386455
## organic-forest        0.040264954  0.010595226 0.06993468 0.0031516
## conventional-forest   0.038091975  0.008875247 0.06730870 0.0049921
## organic-meadow        0.030762169  0.008849623 0.05267471 0.0020795
## conventional-meadow   0.028589190  0.007294023 0.04988436 0.0035796
## conventional-organic -0.002172979 -0.023468146 0.01912219 0.9934267
# perform the Kruskal test
kruskal.test(Abundance ~ sample_type, data = df)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 32.097, df = 3, p-value = 4.992e-07
pairwise.wilcox.test(df$Abundance, df$sample_type,
                     p.adjust.method = "BH")
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest  meadow  organic
## meadow       0.33468 -       -      
## organic      0.00059 0.00022 -      
## conventional 0.00022 9.5e-05 0.95084
## 
## P value adjustment method: BH
x <- df %>%
  group_by(sample_type) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

2. Test Endophyte

unique(FG_df$OTU)
##  [1] "Saprotroph"                        "Ectomycorrhizal"                  
##  [3] "Saprotroph-Symbiotroph"            "Pathotroph-Saprotroph"            
##  [5] "Pathotroph-Symbiotroph"            "Arbuscular Mycorrhizal"           
##  [7] "Endophyte"                         "Pathotroph-Saprotroph-Symbiotroph"
##  [9] "Other Pathotroph"                  "Plant Pathogen"                   
## [11] "Other Symbiotroph"
taxa <- "Endophyte"

# Construct a data.frame with the selected
# taxonomic group and grouping
# relative
df <- filter(FG_df, OTU == taxa)

# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  0.9477 0.4196
##       136
# first one-way ANOVA

res.aov <- aov(Abundance ~ sample_type, data = df)
# Summary of the analysis
summary(res.aov)
##              Df Sum Sq  Mean Sq F value Pr(>F)
## sample_type   3 0.0286 0.009522   1.621  0.187
## Residuals   136 0.7988 0.005874
TukeyHSD(res.aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ sample_type, data = df)
## 
## $sample_type
##                              diff         lwr        upr     p adj
## meadow-forest         0.004251549 -0.05610473 0.06460783 0.9978070
## organic-forest        0.036929219 -0.02342706 0.09728550 0.3868926
## conventional-forest   0.025133837 -0.03430092 0.08456859 0.6901727
## organic-meadow        0.032677670 -0.01189840 0.07725374 0.2300327
## conventional-meadow   0.020882288 -0.02243786 0.06420244 0.5938191
## conventional-organic -0.011795382 -0.05511553 0.03152477 0.8936888
# perform the Kruskal test
kruskal.test(Abundance ~ sample_type, data = df)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 17.962, df = 3, p-value = 0.0004478
pairwise.wilcox.test(df$Abundance, df$sample_type,
                     p.adjust.method = "BH")
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.6097 -      -      
## organic      0.0129 0.0032 -      
## conventional 0.0269 0.0073 0.9088 
## 
## P value adjustment method: BH
x <- df %>%
  group_by(sample_type) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

3. Test Trophic modes

# remove species

z <- as.data.frame(tax_table(ps_FG))

tax <- z[, 1:9]

# rename trophicmode to species
colnames(tax)[1] <- "species"
tax <- tax %>% as.matrix()

# reassign to phyloseq
x <- ps_FG
tax_table(x) <- tax_table(tax)

x <- aggregate_rare(x, level = "species", detection = 0, prevalence = 0)
x               
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 8 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa and 140 samples


# remove "Unknown"
allTaxa = taxa_names(x)
badTaxa = c("Unknown")
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
x <- prune_taxa(myTaxa, x)
x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 7 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 7 taxa by 1 taxonomic ranks ]
# 7 taxa and 140 samples

x_RA <- transform(x, 'compositional')
#create data table
FG_df <-  psmelt(x_RA)
unique(FG_df$species)
## [1] "Saprotroph"                        "Symbiotroph"                      
## [3] "Saprotroph-Symbiotroph"            "Pathotroph-Saprotroph"            
## [5] "Pathotroph-Symbiotroph"            "Pathotroph"                       
## [7] "Pathotroph-Saprotroph-Symbiotroph"
FG_df$species <- as.factor(FG_df$species)

for (i in levels(FG_df$species)) {
  df <- filter(FG_df, species == i)

  print(i)
  result = leveneTest(Abundance ~ sample_type, df)
  print(result)
  k <- kruskal.test(Abundance ~ sample_type, data = df)
  print(k)
  w <- pairwise.wilcox.test(df$Abundance, df$sample_type,
                     p.adjust.method = "BH")
  print(w)
  res.aov <- aov(Abundance ~ sample_type, data = df)
  aov <- summary(res.aov)
  print(aov)
  tukey <- TukeyHSD(res.aov)
  print(tukey)
  }
## [1] "Pathotroph"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   3   8.875 2.058e-05 ***
##       136                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 29.339, df = 3, p-value = 1.901e-06
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest  meadow  organic
## meadow       0.08892 -       -      
## organic      0.00127 0.00127 -      
## conventional 0.00066 0.00015 0.17096
## 
## P value adjustment method: BH 
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## sample_type   3 0.2269 0.07562   11.46 9.54e-07 ***
## Residuals   136 0.8974 0.00660                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ sample_type, data = df)
## 
## $sample_type
##                            diff         lwr        upr     p adj
## meadow-forest        0.01583120 -0.04814065 0.07980305 0.9175818
## organic-forest       0.08324801  0.01927616 0.14721985 0.0050968
## conventional-forest  0.10034692  0.03735180 0.16334205 0.0003455
## organic-meadow       0.06741681  0.02017046 0.11466315 0.0016868
## conventional-meadow  0.08451572  0.03860053 0.13043091 0.0000256
## conventional-organic 0.01709892 -0.02881627 0.06301411 0.7674770
## 
## [1] "Pathotroph-Saprotroph"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  0.4128 0.7441
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 20.866, df = 3, p-value = 0.0001122
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.620  -      -      
## organic      0.473  0.092  -      
## conventional 0.022  4e-05  0.025  
## 
## P value adjustment method: BH 
##              Df Sum Sq Mean Sq F value Pr(>F)  
## sample_type   3  0.166 0.05530   2.305 0.0797 .
## Residuals   136  3.263 0.02399                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ sample_type, data = df)
## 
## $sample_type
##                             diff          lwr       upr     p adj
## meadow-forest        -0.01745120 -0.139437290 0.1045349 0.9823482
## organic-forest        0.02945479 -0.092531299 0.1514409 0.9228783
## conventional-forest   0.06811418 -0.052009412 0.1882378 0.4554546
## organic-meadow        0.04690599 -0.043186704 0.1369987 0.5302224
## conventional-meadow   0.08556539 -0.001988977 0.1731197 0.0580409
## conventional-organic  0.03865939 -0.048894968 0.1262138 0.6601975
## 
## [1] "Pathotroph-Saprotroph-Symbiotroph"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   3  11.314 1.131e-06 ***
##       136                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 16.058, df = 3, p-value = 0.001104
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.120  -      -      
## organic      0.267  0.012  -      
## conventional 0.483  0.001  0.177  
## 
## P value adjustment method: BH 
##              Df Sum Sq  Mean Sq F value   Pr(>F)    
## sample_type   3 0.0909 0.030312   7.509 0.000109 ***
## Residuals   136 0.5490 0.004037                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ sample_type, data = df)
## 
## $sample_type
##                             diff          lwr          upr     p adj
## meadow-forest        -0.04266408 -0.092698462  0.007370300 0.1235555
## organic-forest        0.02369330 -0.026341081  0.073727681 0.6078475
## conventional-forest  -0.01552221 -0.064792665  0.033748240 0.8451907
## organic-meadow        0.06635738  0.029404542  0.103310221 0.0000419
## conventional-meadow   0.02714187 -0.008769836  0.063053574 0.2060867
## conventional-organic -0.03921551 -0.075127218 -0.003303808 0.0264115
## 
## [1] "Pathotroph-Symbiotroph"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  0.7863 0.5035
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 9.1683, df = 3, p-value = 0.02713
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.135  -      -      
## organic      0.931  0.133  -      
## conventional 0.750  0.033  0.454  
## 
## P value adjustment method: BH 
##              Df Sum Sq  Mean Sq F value Pr(>F)
## sample_type   3 0.0105 0.003513   0.789  0.502
## Residuals   136 0.6056 0.004453               
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ sample_type, data = df)
## 
## $sample_type
##                               diff         lwr        upr     p adj
## meadow-forest         0.0148900730 -0.03766324 0.06744339 0.8820199
## organic-forest        0.0202069102 -0.03234641 0.07276023 0.7495680
## conventional-forest   0.0006628179 -0.05108811 0.05241375 0.9999867
## organic-meadow        0.0053168372 -0.03349636 0.04413003 0.9844347
## conventional-meadow  -0.0142272551 -0.05194690 0.02349239 0.7604539
## conventional-organic -0.0195440923 -0.05726374 0.01817556 0.5343221
## 
## [1] "Saprotroph"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  1.4939  0.219
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 29.348, df = 3, p-value = 1.893e-06
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest  meadow  organic
## meadow       0.00015 -       -      
## organic      0.16698 1.7e-05 -      
## conventional 0.04563 0.00027 0.35940
## 
## P value adjustment method: BH 
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## sample_type   3  2.056  0.6855   13.22 1.26e-07 ***
## Residuals   136  7.052  0.0519                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ sample_type, data = df)
## 
## $sample_type
##                             diff         lwr         upr     p adj
## meadow-forest         0.34542965  0.16610621  0.52475309 0.0000098
## organic-forest        0.07736653 -0.10195691  0.25668997 0.6765045
## conventional-forest   0.13565316 -0.04093236  0.31223868 0.1937636
## organic-meadow       -0.26806312 -0.40050226 -0.13562398 0.0000032
## conventional-meadow  -0.20977649 -0.33848420 -0.08106878 0.0002385
## conventional-organic  0.05828663 -0.07042108  0.18699433 0.6417935
## 
## [1] "Saprotroph-Symbiotroph"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  0.4218 0.7376
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 0.97889, df = 3, p-value = 0.8064
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.94   -      -      
## organic      0.94   0.94   -      
## conventional 0.94   0.94   0.94   
## 
## P value adjustment method: BH 
##              Df Sum Sq Mean Sq F value Pr(>F)
## sample_type   3  0.042 0.01398   0.381  0.767
## Residuals   136  4.984 0.03665               
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ sample_type, data = df)
## 
## $sample_type
##                              diff         lwr        upr     p adj
## meadow-forest        -0.040917266 -0.19167857 0.10984404 0.8945909
## organic-forest        0.001318962 -0.14944234 0.15208027 0.9999957
## conventional-forest  -0.010145065 -0.15860454 0.13831440 0.9979963
## organic-meadow        0.042236228 -0.06910837 0.15358083 0.7572807
## conventional-meadow   0.030772200 -0.07743530 0.13897970 0.8808802
## conventional-organic -0.011464028 -0.11967153 0.09674348 0.9926584
## 
## [1] "Symbiotroph"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   3  10.346 3.531e-06 ***
##       136                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 15.613, df = 3, p-value = 0.001361
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest  meadow  organic
## meadow       0.00084 -       -      
## organic      0.00277 0.27073 -      
## conventional 0.00158 0.70169 0.70169
## 
## P value adjustment method: BH 
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## sample_type   3  0.980  0.3268   11.88 5.87e-07 ***
## Residuals   136  3.741  0.0275                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ sample_type, data = df)
## 
## $sample_type
##                             diff         lwr         upr     p adj
## meadow-forest        -0.27511837 -0.40573532 -0.14450143 0.0000012
## organic-forest       -0.23528850 -0.36590544 -0.10467155 0.0000394
## conventional-forest  -0.27910980 -0.40773248 -0.15048713 0.0000006
## organic-meadow        0.03982988 -0.05663713  0.13629688 0.7059692
## conventional-meadow  -0.00399143 -0.09774051  0.08975765 0.9995118
## conventional-organic -0.04382131 -0.13757038  0.04992777 0.6179047
x <- FG_df %>% group_by(species, sample_type) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x
## # A tibble: 28 × 4
## # Groups:   species [7]
##    species                           sample_type    mean      se
##    <fct>                             <fct>         <dbl>   <dbl>
##  1 Pathotroph                        forest       0.0180 0.00720
##  2 Pathotroph                        meadow       0.0338 0.00750
##  3 Pathotroph                        organic      0.101  0.0170 
##  4 Pathotroph                        conventional 0.118  0.0133 
##  5 Pathotroph-Saprotroph             forest       0.106  0.0351 
##  6 Pathotroph-Saprotroph             meadow       0.0889 0.0196 
##  7 Pathotroph-Saprotroph             organic      0.136  0.0265 
##  8 Pathotroph-Saprotroph             conventional 0.174  0.0257 
##  9 Pathotroph-Saprotroph-Symbiotroph forest       0.0557 0.0304 
## 10 Pathotroph-Saprotroph-Symbiotroph meadow       0.0131 0.00319
## # ℹ 18 more rows

4. Trophic modes in forest deepest layer

ps_x <- subset_samples(ps_FG, sample_type=="forest" & depth=="40...")
ps_x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 3 samples ]
## sample_data() Sample Data:       [ 3 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 11 taxonomic ranks ]
ps_FG_Tm <- aggregate_rare(ps_x, level = "trophicMode", detection = 0, prevalence = 0)
ps_FG_Tm
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 8 taxa and 3 samples ]
## sample_data() Sample Data:       [ 3 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa and 15 samples

# remove "Unknown"
allTaxa = taxa_names(ps_FG_Tm)
badTaxa = c("Unknown")
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
ps_FG_Tm <- prune_taxa(myTaxa, ps_FG_Tm)
ps_FG_Tm
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 7 taxa and 3 samples ]
## sample_data() Sample Data:       [ 3 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 7 taxa by 1 taxonomic ranks ]
# 7 taxa and 15 samples

ps_FG_Tm_RA <- microbiome::transform(ps_FG_Tm, "compositional")

FG_df <- psmelt(ps_FG_Tm_RA)

x <- FG_df %>%
  group_by(OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

x
## # A tibble: 7 × 3
##   OTU                                  mean      se
##   <chr>                               <dbl>   <dbl>
## 1 Pathotroph                        0.00516 0.00516
## 2 Pathotroph-Saprotroph             0.181   0.154  
## 3 Pathotroph-Saprotroph-Symbiotroph 0.0185  0.0185 
## 4 Pathotroph-Symbiotroph            0       0      
## 5 Saprotroph                        0.0193  0.0190 
## 6 Saprotroph-Symbiotroph            0.0274  0.0272 
## 7 Symbiotroph                       0.748   0.141

RESULTS STEPS 8: Spearman correlations of depth with 5 most abundant phyla, classes, genera and trophic mode

Note all correlations are done without forest!

library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
load(file = 'ps_FG_with_NAs')#ps_FG
ps_FG
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 11 taxonomic ranks ]

Spearman rank correlation with depth without forest

1. Trophic modes

1.1. Remove forest and NAs

# remove NAs from ps_FG

# this aggregation makes NAs into "Unknown"
ps_FG_x <- aggregate_rare(ps_FG, level = 'trophicMode', detection = 0/100, prevalence = 0/140)
ps_FG_x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 8 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa

# remove "Unknown"
allTaxa = taxa_names(ps_FG_x)
badTaxa = c("Unknown")
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
ps_FG_x_pruned <- prune_taxa(myTaxa, ps_FG_x)
ps_FG_x_pruned
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 7 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 7 taxa by 1 taxonomic ranks ]
# 7 taxa

FG_nf <- subset_samples(ps_FG_x_pruned, sample_type != "forest")
FG_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 7 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 7 taxa by 1 taxonomic ranks ]
FG_RA_nf <- transform(FG_nf, "compositional")
FG_RA_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 7 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 7 taxa by 1 taxonomic ranks ]
df <- psmelt(FG_RA_nf)

1.2. Five most abundant Trophic modes

df$year <- "2019"

# sum the values if same genus: Sum by Group Based on dplyr Package

x <- df %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups:   year [1]
##   year  OTU                      mean      se
##   <chr> <chr>                   <dbl>   <dbl>
## 1 2019  Saprotroph             0.422  0.0227 
## 2 2019  Saprotroph-Symbiotroph 0.216  0.0168 
## 3 2019  Pathotroph-Saprotroph  0.135  0.0143 
## 4 2019  Pathotroph             0.0858 0.00824
## 5 2019  Symbiotroph            0.0855 0.0120

1.3. Test for each Trophic mode separately

taxa <- y$OTU

for (i in taxa) {
 df_x <- subset(df, OTU == i)
 print(i)
 x <- cor.test(df_x$Abundance, df_x$depth_numerical,  method = "spearman", na.rm = TRUE)
 print(x)
}
## [1] "Saprotroph"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 435672, p-value = 0.000113
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.3384715 
## 
## [1] "Saprotroph-Symbiotroph"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 312947, p-value = 0.6694
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##      rho 
## 0.038566 
## 
## [1] "Pathotroph-Saprotroph"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 276512, p-value = 0.09387
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.1505015 
## 
## [1] "Pathotroph"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 525434, p-value = 2.576e-14
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.6142379 
## 
## [1] "Symbiotroph"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 223897, p-value = 0.0003944
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.3121434

2. Phyla

2.1. Remove forest

ps_nf <- subset_samples(ps, sample_type != "forest")
ps_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf <- transform(ps_nf, "compositional")
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf_phy <- aggregate_rare(ps_RA_nf, level = "phylum", detection = 0.0, prevalence = 0.0)
ps_RA_nf_phy
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 14 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 14 taxa by 1 taxonomic ranks ]
df <- psmelt(ps_RA_nf_phy)

2.2. Five most abundant phyla

df$year <- "2019"

x <- df %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups:   year [1]
##   year  OTU                  mean       se
##   <chr> <chr>               <dbl>    <dbl>
## 1 2019  Ascomycota        0.692   0.0151  
## 2 2019  Basidiomycota     0.199   0.0134  
## 3 2019  Mortierellomycota 0.0793  0.00956 
## 4 2019  Glomeromycota     0.0264  0.00683 
## 5 2019  Chytridiomycota   0.00209 0.000362

2.3. Test for each phyla separately

taxa <- y$OTU

for (i in taxa) {
  df_x <- subset(df, OTU == i)
  print(i)
  x <- cor.test(df_x$Abundance, df_x$depth_numerical,  method = "spearman", na.rm = TRUE)
  print(x)
  }
## [1] "Ascomycota"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 298097, p-value = 0.3506
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.08418676 
## 
## [1] "Basidiomycota"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 491192, p-value = 1.347e-09
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.5090399 
## 
## [1] "Mortierellomycota"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 245027, p-value = 0.005442
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##     rho 
## 0.24723 
## 
## [1] "Glomeromycota"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 257422, p-value = 0.01924
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.2091478 
## 
## [1] "Chytridiomycota"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 569974, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.7510735

3. Classes

3.1. Remove forest

ps_nf <- subset_samples(ps, sample_type != "forest")
ps_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf <- transform(ps_nf, "compositional")
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf_cla <- aggregate_rare(ps_RA_nf, level = "class", detection = 0.0, prevalence = 0.0)
ps_RA_nf_cla
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 65 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 65 taxa by 2 taxonomic ranks ]
df <- psmelt(ps_RA_nf_cla)

3.2. Five most abundant phyla

df$year <- "2019"

x <- df %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups:   year [1]
##   year  OTU                  mean      se
##   <chr> <chr>               <dbl>   <dbl>
## 1 2019  Leotiomycetes      0.291  0.0218 
## 2 2019  Sordariomycetes    0.185  0.0131 
## 3 2019  Dothideomycetes    0.137  0.0140 
## 4 2019  Tremellomycetes    0.132  0.0111 
## 5 2019  Mortierellomycetes 0.0793 0.00956

3.3. Test for each class separately

taxa <- y$OTU

for (i in taxa) {
  df_x <- subset(df, OTU == i)
  print(i)
  x <- cor.test(df_x$Abundance, df_x$depth_numerical,  method = "spearman", na.rm = TRUE)
  print(x)
}
## [1] "Leotiomycetes"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 139957, p-value = 3.966e-12
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5700243 
## 
## [1] "Sordariomycetes"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 426742, p-value = 0.0004147
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.3110364 
## 
## [1] "Dothideomycetes"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 548039, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.6836843 
## 
## [1] "Tremellomycetes"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 558143, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.7147252 
## 
## [1] "Mortierellomycetes"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 245027, p-value = 0.005442
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##     rho 
## 0.24723

4. Genera

4.1. Remove forest

ps_nf <- subset_samples(ps, sample_type != "forest")
ps_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf <- transform(ps_nf, "compositional")
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf_gen <- aggregate_rare(ps_RA_nf, level = "genus", detection = 0.0, prevalence = 0.0)
ps_RA_nf_gen
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 895 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 895 taxa by 2 taxonomic ranks ]
df <- psmelt(ps_RA_nf_gen)

4.2. Five most abundant genera

Note! Exclude the obes not classified at genus level!

df$year <- "2019"

x <- df %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:6)
print(y)
## # A tibble: 6 × 4
## # Groups:   year [1]
##   year  OTU                          mean      se
##   <chr> <chr>                       <dbl>   <dbl>
## 1 2019  Leotiomycetes_unclassified 0.109  0.0168 
## 2 2019  Saitozyma                  0.0664 0.00702
## 3 2019  Pseudeurotium              0.0533 0.0122 
## 4 2019  Paraphaeosphaeria          0.0516 0.0110 
## 5 2019  Mortierella                0.0478 0.00662
## 6 2019  Solicoccozyma              0.0463 0.00584

4.3. Test for each genera separately

We don’t want to test for Leotiomycetes_unclassified

taxa <- y$OTU
taxa[! taxa %in% c("Leotiomycetes_unclassified")]
## [1] "Saitozyma"         "Pseudeurotium"     "Paraphaeosphaeria"
## [4] "Mortierella"       "Solicoccozyma"
for (i in taxa) {
  df_x <- subset(df, OTU == i)
  print(i)
  x <- cor.test(df_x$Abundance, df_x$depth_numerical,  method = "spearman", na.rm = TRUE)
  print(x)
}
## [1] "Leotiomycetes_unclassified"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 186445, p-value = 6.745e-07
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.4272047 
## 
## [1] "Saitozyma"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 549794, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.6890738 
## 
## [1] "Pseudeurotium"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 329787, p-value = 0.8841
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##         rho 
## -0.01316959 
## 
## [1] "Paraphaeosphaeria"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 565995, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.7388476 
## 
## [1] "Mortierella"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 239975, p-value = 0.003073
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.2627505 
## 
## [1] "Solicoccozyma"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 545184, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.6749137

RESULTS STEP 9: Calculate the average number of reads and OTUs and the total number of OTUs in each soil layer

library('phyloseq')
library("dplyr")
library("tibble")
library("microbiome")
library("tibble")

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)

OTU <- as.data.frame(otu_table(ps))
OTU <- t(OTU)

# calculate reads per sample:
OTU2 <- OTU
OTU2 <- as.data.frame(OTU2)
OTU2$reads <- rowSums(OTU2)

# add to meta

meta$reads <- OTU2$reads
rm(OTU2)

OTU <- as.matrix(OTU)

#convert an abundance matrix to a presence-absence matrix. (make any number greater than 0 into a 1)
OTU[OTU > 0] <- 1 #converts from abundance to P/A
OTU <- as.data.frame(OTU)
# calculate number of different otus in a sample
OTU$OTUs <- rowSums(OTU)

# add to meta
meta$OTUs <- OTU$OTUs
rm(OTU)

# save new meta
sample_data(ps) <- sample_data(meta)
save(ps, file = 'ps_FINAL')

mean(meta$OTUs)
## [1] 1480.243
# 1480.243

# mean values of OTUs in soil layers

x <- meta %>%
  dplyr::group_by(depth) %>%
  dplyr::summarise(OTUs_mean = mean(OTUs, na.rm = TRUE), OTUs_se = (sd(OTUs, na.rm = TRUE)/sqrt(length((OTUs)))))
print(paste("how many OTUs on average in each soil layer"))
## [1] "how many OTUs on average in each soil layer"
print(x)
## # A tibble: 5 × 3
##   depth   OTUs_mean OTUs_se
##   <chr>       <dbl>   <dbl>
## 1 0...10      2184.   183. 
## 2 10...20     2495.   134. 
## 3 20...30     1900.   187. 
## 4 30...40      607.    86.0
## 5 40...        215.    39.9
y <- meta %>%
  dplyr::group_by(depth) %>%
  dplyr::summarise(reads_mean = mean(reads, na.rm = TRUE), reads_se = (sd(reads, na.rm = TRUE)/sqrt(length((reads)))))
print(paste("how many reads on average in each soil layer"))
## [1] "how many reads on average in each soil layer"
print(y)
## # A tibble: 5 × 3
##   depth   reads_mean reads_se
##   <chr>        <dbl>    <dbl>
## 1 0...10      80957.    4871.
## 2 10...20     95172.    6279.
## 3 20...30     97826.    8155.
## 4 30...40    102635.   15860.
## 5 40...       35534.    8482.
xy <- left_join(x, y, by="depth")

# save

write.csv2(xy, file = "OTUs_and_reads_in_depths.csv", row.names = FALSE)

library("metagMisc")

ps_x <- phyloseq_average(
  ps,
  avg_type = "arithmetic",
  group = "depth",
  drop_group_zero = FALSE,
  verbose = FALSE,
  progress = NULL
)

ps_x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 5 samples ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
# 20610 taxa and 5 samples

OTU <- as.data.frame(otu_table(ps_x))
OTU <- t(OTU)


OTU <- as.matrix(OTU)

#convert an abundance matrix to a presence-absence matrix. (make any number greater than 0 into a 1)
OTU[OTU > 0] <- 1 #converts from abundance to P/A
OTU <- as.data.frame(OTU)
# calculate number of different otus in a sample
OTU$OTUs <- rowSums(OTU)

OTU$depth <- rownames(OTU)

OTU <- OTU[, 20611:20612]
print(paste("how many OTUs in total in each soil layer"))
## [1] "how many OTUs in total in each soil layer"
print(OTU)
##          OTUs   depth
## 0...10  14737  0...10
## 10...20 16268 10...20
## 20...30 14763 20...30
## 30...40  5367 30...40
## 40...    2563   40...

Test OTU richness between layers

library(car)
# Using leveneTest()
result = leveneTest(OTUs ~ depth, meta)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   4  16.055 8.907e-11 ***
##       135                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# not homogenous


# perform the Kruskal test
kruskal.test(OTUs ~ depth, data = meta)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  OTUs by depth
## Kruskal-Wallis chi-squared = 91.593, df = 4, p-value < 2.2e-16
pairwise.wilcox.test(meta$OTUs, meta$depth,
                     p.adjust.method = "BH")
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  meta$OTUs and meta$depth 
## 
##         0...10  10...20 20...30 30...40
## 10...20 0.30647 -       -       -      
## 20...30 0.33226 0.02782 -       -      
## 30...40 2.4e-09 8.9e-10 4.1e-07 -      
## 40...   8.9e-10 8.9e-10 1.7e-09 0.00065
## 
## P value adjustment method: BH

RESULTS STEP 10: Richness figures and statistical testing

library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)


setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

load(file = 'ps_FG_with_NAs')#ps_FG
ps_FG
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 11 taxonomic ranks ]
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 31 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)

1. Calculate FUNGuild richnesses

1.1. Saprotroph

x_sub <- subset_taxa(ps_FG, trophicMode %in% c("Saprotroph"))
x_sub
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 4842 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 4842 taxa by 11 taxonomic ranks ]
# 4842 taxa and 140 samples

# Save output of `plot_richness` as a variable
richness <- plot_richness(x_sub, x = "sample_type", measures = c("Observed"), color = "depth")

# get the data into data frame
richness_df <- richness$data

# the observed richness is in the column "value", lets change this to saprotroph_richness
colnames(richness_df)[31] <- "saprotroph_richness"
# and lets remove the "variable" and "se" columns
richness_df <- subset(richness_df, select = -variable)
richness_df <- subset(richness_df, select = -se)

FG_richness <- richness_df

1.2. Symbiotroph

x_sub <- subset_taxa(ps_FG, trophicMode %in% c("Symbiotroph"))
x_sub
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 784 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 784 taxa by 11 taxonomic ranks ]
# 784 taxa and 140 samples



# Save output of `plot_richness` as a variable
richness <- plot_richness(x_sub, x = "sample_type", measures = c("Observed"), color = "depth")

# get the data
richness_df <- richness$data

# the observed richness is in the column "value", lets change this to symbiotroph_richness
colnames(richness_df)[31] <- "symbiotroph_richness"
# and lets remove extra columns
richness_df <- richness_df[, c(1, 31)]

# combine
FG_richness <- dplyr::left_join(FG_richness, richness_df, by = "sampleID")
FG_richness <- subset(FG_richness, select = -samples)

# sampleID into rownames
rownames(FG_richness) <- FG_richness[,1]

1.3. Pathotroph

x_sub <- subset_taxa(ps_FG, trophicMode %in% c("Pathotroph"))
x_sub
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1500 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 1500 taxa by 11 taxonomic ranks ]
# 1500 taxa and 140 samples



# Save output of `plot_richness` as a variable
richness <- plot_richness(x_sub, x = "sample_type", measures = c("Observed"), color = "depth")

# get the data
richness_df <- richness$data

# the observed richness is in the column "value", lets change this to pathotroph_richness
colnames(richness_df)[31] <- "pathotroph_richness"
# and lets remove extra columns
richness_df <- richness_df[, c(1, 31)]

# combine
FG_richness <- dplyr::left_join(FG_richness, richness_df, by = "sampleID")


# sampleID into rownames
rownames(FG_richness) <- FG_richness[,1]

2. Calculate AMF (Glomeromycota) Richness

Note! AMF richness calculated from FUNGuild is exactly the same (not shown here)!

x_sub <- subset_taxa(ps, phylum %in% c("Glomeromycota"))
x_sub
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 263 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 31 sample variables ]
## tax_table()   Taxonomy Table:    [ 263 taxa by 7 taxonomic ranks ]
# 263 taxa and 140 samples



# Save output of `plot_richness` as a variable
richness <- plot_richness(x_sub, x = "sample_type", measures = c("Observed"), color = "depth")

# get the data
richness_df <- richness$data

# the observed richness is in the column "value", lets change this to AMF_richness
colnames(richness_df)[34] <- "AMF_richness"
# and lets remove extra columns
richness_df <- richness_df[, c(1, 34)]

# combine
FG_richness <- dplyr::left_join(FG_richness, richness_df, by = "sampleID")

# sampleID into rownames
rownames(FG_richness) <- FG_richness[,1]

# I actually want the meta data from the ps rather than the ps_FG, so leyt's remove extra columns

FG_richness <- FG_richness[, c(1, 29:32)]

meta <- dplyr::left_join(meta, FG_richness, by = "sampleID")

# sampleID into rownames
rownames(meta) <- meta[,1]

3. Save new richness measures in meta data

sample_data(ps) <- sample_data(meta)

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

save(ps, file = 'ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 35 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]

4. Richness plots

4.1. Fungal richness

# create your own color palette for sample types
MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional =  "#b71c1c")

# OTU richness
OTU_rich <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(observed, na.rm = TRUE), se = (sd(observed, na.rm = TRUE)/sqrt(length((observed))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Fungal richness") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
print(OTU_rich)

Change to thousands of reads (x1000)

rich_k <- meta %>% dplyr::mutate(richness_k = observed/1000 ) %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(richness_k, na.rm = TRUE), se = (sd(richness_k, na.rm = TRUE)/sqrt(length((richness_k))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Fungal richness \n (x1000)") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)

rich_k

4.2. AMF richness

gm_rich <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(AMF_richness, na.rm = TRUE), se = (sd(AMF_richness, na.rm = TRUE)/sqrt(length((AMF_richness))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="AMF richness \n") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)

gm_rich

4.3. Saprotroph richness

Saprotroph_rich <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(saprotroph_richness, na.rm = TRUE), se = (sd(saprotroph_richness, na.rm = TRUE)/sqrt(length((saprotroph_richness))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Saprotroph richness \n") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)


Saprotroph_rich

4.4. Symbiotroph richness

symb_rich <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(symbiotroph_richness, na.rm = TRUE), se = (sd(symbiotroph_richness, na.rm = TRUE)/sqrt(length((symbiotroph_richness))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Symbiotroph richness \n") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)


symb_rich

4.5. Pathotroph richness

path_rich <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(pathotroph_richness, na.rm = TRUE), se = (sd(pathotroph_richness, na.rm = TRUE)/sqrt(length((pathotroph_richness))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Pathotroph richness \n") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)


path_rich

5. Combine selected figures

figure <- ggarrange(rich_k, gm_rich + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), Saprotroph_rich + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), path_rich + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"),
                    labels = c("A", "B", "C", "D"),
                    ncol = 4, nrow = 1, common.legend = TRUE, legend = "right")

figure

6. Test richnes between management types in all soil layers

library("multcomp")

Change depth and richness measure accordingly

Test these: - observed - AMF_richness - saprotroph_richness - pathotroph_richness

library(car)

means_and_ses <- list()

meta$depth <- as.factor(meta$depth)
rich <-c("observed", "AMF_richness", "saprotroph_richness", "pathotroph_richness")

for (i in rich) {
  for (j in levels(meta$depth)) {
    df <- filter(meta, depth==j)
    
     print(i)
     print(j)
     # Levene's test for homogeneity of variance
    result <- leveneTest(df[[i]] ~ sample_type, data = df)
    print(result)
     
    # Kruskal-Wallis test
    k <- kruskal.test(df[[i]] ~ sample_type, data = df)
    print(k)
    
    # Pairwise Wilcoxon test
    w <- pairwise.wilcox.test(df[[i]], df$sample_type, p.adjust.method = "BH")
    print(w)
    
     # ANOVA
    res.aov <- aov(df[[i]] ~ sample_type, data = df)
    aov_summary <- summary(res.aov)
    print(aov_summary)
    
    # Tukey's HSD test
    tukey <- TukeyHSD(res.aov)
    print(tukey)
    
    # Group by and summarize
    mean_and_se <- df %>%
      group_by(sample_type) %>%
      summarise(mean = mean(!!sym(i), na.rm = TRUE), se = sd(!!sym(i), na.rm = TRUE) / sqrt(n()))
  
    # Store the result in the list with a descriptive name
    result_name <- paste("depth", j, "diversity", i, sep = "_")
    means_and_ses[[result_name]] <- mean_and_se
    
  }
}
## [1] "observed"
## [1] "0...10"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  1.0589 0.3849
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 17.971, df = 3, p-value = 0.0004458
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest  meadow  organic
## meadow       0.59636 -       -      
## organic      0.03636 0.00093 -      
## conventional 0.03636 0.00093 0.67297
## 
## P value adjustment method: BH 
##             Df   Sum Sq Mean Sq F value   Pr(>F)    
## sample_type  3 17009022 5669674   16.24 5.61e-06 ***
## Residuals   24  8379531  349147                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff        lwr       upr     p adj
## meadow-forest        -345.2917 -1448.8240  758.2407 0.8235682
## organic-forest       1417.2083   313.6760 2520.7407 0.0084047
## conventional-forest  1246.1111   159.4276 2332.7947 0.0204125
## organic-meadow       1762.5000   947.4873 2577.5127 0.0000209
## conventional-meadow  1591.4028   799.3528 2383.4527 0.0000593
## conventional-organic -171.0972  -963.1472  620.9527 0.9323217
## 
## [1] "observed"
## [1] "10...20"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  1.6778 0.1983
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 9.1193, df = 3, p-value = 0.02775
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.200  -      -      
## organic      0.068  0.459  -      
## conventional 0.068  0.068  0.541  
## 
## P value adjustment method: BH 
##             Df  Sum Sq Mean Sq F value Pr(>F)  
## sample_type  3 4612134 1537378   4.074 0.0179 *
## Residuals   24 9056485  377354                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff       lwr      upr     p adj
## meadow-forest         611.9167 -535.3253 1759.159 0.4695196
## organic-forest       1043.5417 -103.7003 2190.784 0.0838551
## conventional-forest  1288.4444  158.7186 2418.170 0.0212276
## organic-meadow        431.6250 -415.6694 1278.919 0.5083506
## conventional-meadow   676.5278 -146.8943 1499.950 0.1343587
## conventional-organic  244.9028 -578.5193 1068.325 0.8442175
## 
## [1] "observed"
## [1] "20...30"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  2.0857 0.1287
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 4.6562, df = 3, p-value = 0.1988
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.15   -      -      
## organic      0.25   0.96   -      
## conventional 0.42   0.96   0.72   
## 
## P value adjustment method: BH 
##             Df   Sum Sq Mean Sq F value Pr(>F)
## sample_type  3  4742212 1580737   1.755  0.183
## Residuals   24 21619303  900804               
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff        lwr       upr     p adj
## meadow-forest        1334.2083  -438.3317 3106.7484 0.1894002
## organic-forest       1381.5833  -390.9567 3154.1234 0.1662604
## conventional-forest  1032.6667  -712.8101 2778.1435 0.3804286
## organic-meadow         47.3750 -1261.7326 1356.4826 0.9996332
## conventional-meadow  -301.5417 -1573.7656  970.6822 0.9131616
## conventional-organic -348.9167 -1621.1406  923.3072 0.8729624
## 
## [1] "observed"
## [1] "30...40"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  3  2.6803 0.06959 .
##       24                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 3.7523, df = 3, p-value = 0.2895
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.46   -      -      
## organic      0.46   0.46   -      
## conventional 0.46   0.46   0.67   
## 
## P value adjustment method: BH 
##             Df  Sum Sq Mean Sq F value Pr(>F)
## sample_type  3  986615  328872   1.716   0.19
## Residuals   24 4599845  191660               
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                            diff       lwr       upr     p adj
## meadow-forest         624.41667 -193.1945 1442.0279 0.1795937
## organic-forest        289.91667 -527.6945 1107.5279 0.7630092
## conventional-forest   342.44444 -462.6834 1147.5723 0.6489479
## organic-meadow       -334.50000 -938.3459  269.3459 0.4370977
## conventional-meadow  -281.97222 -868.8049  304.8605 0.5563050
## conventional-organic   52.52778 -534.3049  639.3605 0.9945665
## 
## [1] "observed"
## [1] "40..."
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  1.3401 0.2848
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 10.863, df = 3, p-value = 0.01249
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.153  -      -      
## organic      0.056  0.056  -      
## conventional 0.175  0.963  0.056  
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)  
## sample_type  3 407159  135720   4.089 0.0177 *
## Residuals   24 796553   33190                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                            diff         lwr       upr     p adj
## meadow-forest         114.75000 -225.487851 454.98785 0.7889685
## organic-forest        337.87500   -2.362851 678.11285 0.0520810
## conventional-forest    77.11111 -257.931964 412.15419 0.9197120
## organic-meadow        223.12500  -28.157306 474.40731 0.0944700
## conventional-meadow   -37.63889 -281.841396 206.56362 0.9736063
## conventional-organic -260.76389 -504.966396 -16.56138 0.0333018
## 
## [1] "AMF_richness"
## [1] "0...10"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  0.4636 0.7103
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 8.1971, df = 3, p-value = 0.04211
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.33   -      -      
## organic      0.13   0.56   -      
## conventional 0.13   0.13   0.15   
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)  
## sample_type  3  664.5   221.5   3.238 0.0398 *
## Residuals   24 1641.6    68.4                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff        lwr      upr     p adj
## meadow-forest         6.708333 -8.7373996 22.15407 0.6339029
## organic-forest        9.083333 -6.3623996 24.52907 0.3855925
## conventional-forest  15.555556  0.3456492 30.76546 0.0437049
## organic-meadow        2.375000 -9.0324298 13.78243 0.9387885
## conventional-meadow   8.847222 -2.2388068 19.93325 0.1515032
## conventional-organic  6.472222 -4.6138068 17.55825 0.3918515
## 
## [1] "AMF_richness"
## [1] "10...20"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  0.1141  0.951
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 7.4387, df = 3, p-value = 0.05916
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.076  -      -      
## organic      0.170  0.442  -      
## conventional 0.076  0.699  0.433  
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)  
## sample_type  3    894  298.00   4.384 0.0135 *
## Residuals   24   1631   67.97                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                            diff        lwr      upr     p adj
## meadow-forest        18.5833333   3.186290 33.98038 0.0139049
## organic-forest       12.4583333  -2.938710 27.85538 0.1432253
## conventional-forest  17.7777778   2.615818 32.93974 0.0173327
## organic-meadow       -6.1250000 -17.496470  5.24647 0.4611598
## conventional-meadow  -0.8055556 -11.856638 10.24553 0.9970402
## conventional-organic  5.3194444  -5.731638 16.37053 0.5548799
## 
## [1] "AMF_richness"
## [1] "20...30"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  2.1721 0.1176
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 10.239, df = 3, p-value = 0.01664
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.056  -      -      
## organic      0.056  0.154  -      
## conventional 0.404  0.068  0.402  
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## sample_type  3   2273   757.8   4.859 0.00883 **
## Residuals   24   3743   156.0                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                            diff        lwr        upr     p adj
## meadow-forest         28.708333   5.385511 52.0311555 0.0119094
## organic-forest        18.083333  -5.239489 41.4061555 0.1696484
## conventional-forest   11.222222 -11.744505 34.1889499 0.5427300
## organic-meadow       -10.625000 -27.850046  6.6000457 0.3447892
## conventional-meadow  -17.486111 -34.225847 -0.7463756 0.0383380
## conventional-organic  -6.861111 -23.600847  9.8786244 0.6746486
## 
## [1] "AMF_richness"
## [1] "30...40"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  2.2858 0.1044
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 9.515, df = 3, p-value = 0.02317
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.073  -      -      
## organic      0.643  0.228  -      
## conventional 0.926  0.023  0.643  
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## sample_type  3   3828  1275.9   5.884 0.00369 **
## Residuals   24   5204   216.8                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                            diff         lwr       upr     p adj
## meadow-forest         26.875000  -0.6265144 54.376514 0.0571204
## organic-forest        10.375000 -17.1265144 37.876514 0.7276472
## conventional-forest   -1.777778 -28.8593971 25.303842 0.9978313
## organic-meadow       -16.500000 -36.8112145  3.811214 0.1408901
## conventional-meadow  -28.652778 -48.3917302 -8.913825 0.0027413
## conventional-organic -12.152778 -31.8917302  7.586175 0.3463849
## 
## [1] "AMF_richness"
## [1] "40..."
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3   1.591 0.2176
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 4.4541, df = 3, p-value = 0.2164
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.36   -      -      
## organic      0.36   0.83   -      
## conventional 0.51   0.36   0.36   
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)
## sample_type  3  267.9   89.31   1.488  0.243
## Residuals   24 1440.9   60.04               
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                            diff        lwr       upr     p adj
## meadow-forest         6.0416667  -8.429273 20.512607 0.6619918
## organic-forest        7.4166667  -7.054273 21.887607 0.5032636
## conventional-forest   0.7777778 -13.472219 15.027774 0.9987481
## organic-meadow        1.3750000  -9.312497 12.062497 0.9842975
## conventional-meadow  -5.2638889 -15.650269  5.122492 0.5126347
## conventional-organic -6.6388889 -17.025269  3.747492 0.3148737
## 
## [1] "saprotroph_richness"
## [1] "0...10"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3   1.236 0.3185
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 10.335, df = 3, p-value = 0.01592
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.921  -      -      
## organic      0.170  0.046  -      
## conventional 0.283  0.046  0.283  
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## sample_type  3 552978  184326   5.106 0.00711 **
## Residuals   24 866367   36099                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                            diff          lwr      upr     p adj
## meadow-forest          22.66667 -332.1680785 377.5014 0.9980002
## organic-forest        354.66667   -0.1680785 709.5014 0.0501395
## conventional-forest   219.11111 -130.3059916 568.5282 0.3308818
## organic-meadow        332.00000   69.9371730 594.0628 0.0094204
## conventional-meadow   196.44444  -58.2348452 451.1237 0.1730235
## conventional-organic -135.55556 -390.2348452 119.1237 0.4713122
## 
## [1] "saprotroph_richness"
## [1] "10...20"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  1.3935 0.2689
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 8.4841, df = 3, p-value = 0.037
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.073  -      -      
## organic      0.170  0.193  -      
## conventional 0.073  0.185  0.888  
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)  
## sample_type  3 368992  122997    3.91 0.0209 *
## Residuals   24 754986   31458                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                            diff        lwr       upr     p adj
## meadow-forest         405.16667   73.92528 736.40806 0.0125214
## organic-forest        249.79167  -81.44972 581.03306 0.1881316
## conventional-forest   260.11111  -66.07286 586.29508 0.1519732
## organic-meadow       -155.37500 -400.01298  89.26298 0.3201964
## conventional-meadow  -145.05556 -382.80094  92.68983 0.3540753
## conventional-organic   10.31944 -227.42594 248.06483 0.9993682
## 
## [1] "saprotroph_richness"
## [1] "20...30"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  1.4484 0.2535
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 9.2572, df = 3, p-value = 0.02606
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.036  -      -      
## organic      0.267  0.292  -      
## conventional 0.447  0.036  0.541  
## 
## P value adjustment method: BH 
##             Df  Sum Sq Mean Sq F value Pr(>F)  
## sample_type  3  784367  261456   3.683 0.0259 *
## Residuals   24 1703653   70986                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff        lwr        upr     p adj
## meadow-forest         542.7083   45.12498 1040.29168 0.0289462
## organic-forest        358.2083 -139.37502  855.79168 0.2210832
## conventional-forest   230.3333 -259.65288  720.31955 0.5737676
## organic-meadow       -184.5000 -551.98966  182.98966 0.5204628
## conventional-meadow  -312.3750 -669.51075   44.76075 0.1015303
## conventional-organic -127.8750 -485.01075  229.26075 0.7576950
## 
## [1] "saprotroph_richness"
## [1] "30...40"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3   1.706 0.1924
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 6.5626, df = 3, p-value = 0.08723
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.388  -      -      
## organic      0.864  0.249  -      
## conventional 0.864  0.091  0.482  
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)  
## sample_type  3 192644   64215   3.255 0.0392 *
## Residuals   24 473414   19726                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                             diff        lwr        upr     p adj
## meadow-forest         186.708333  -75.59003 449.006696 0.2294520
## organic-forest          2.083333 -260.21503 264.381696 0.9999961
## conventional-forest     5.111111 -253.18246 263.404684 0.9999398
## organic-meadow       -184.625000 -378.34518   9.095179 0.0656538
## conventional-meadow  -181.597222 -369.85940   6.664952 0.0614447
## conventional-organic    3.027778 -185.23440 191.289952 0.9999677
## 
## [1] "saprotroph_richness"
## [1] "40..."
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  0.8324 0.4892
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 3.6785, df = 3, p-value = 0.2983
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.39   -      -      
## organic      0.39   0.76   -      
## conventional 0.39   0.47   0.81   
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)
## sample_type  3   9406    3135   0.892  0.459
## Residuals   24  84343    3514               
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff        lwr       upr     p adj
## meadow-forest         52.66667  -58.04632 163.37965 0.5643329
## organic-forest        47.29167  -63.42132 158.00465 0.6458956
## conventional-forest   20.00000  -89.02261 129.02261 0.9568518
## organic-meadow        -5.37500  -87.14196  76.39196 0.9978224
## conventional-meadow  -32.66667 -112.12987  46.79653 0.6726275
## conventional-organic -27.29167 -106.75487  52.17153 0.7798186
## 
## [1] "pathotroph_richness"
## [1] "0...10"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  3  3.5826 0.02853 *
##       24                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 19.378, df = 3, p-value = 0.0002283
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest  meadow  organic
## meadow       0.93091 -       -      
## organic      0.01818 0.00047 -      
## conventional 0.01818 0.00047 1.00000
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## sample_type  3 404412  134804   25.49 1.23e-07 ***
## Residuals   24 126933    5289                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                            diff        lwr      upr     p adj
## meadow-forest        -11.041667 -146.86103 124.7777 0.9959110
## organic-forest       234.833333   99.01397 370.6527 0.0004079
## conventional-forest  240.666667  106.92100 374.4123 0.0002508
## organic-meadow       245.875000  145.56575 346.1843 0.0000031
## conventional-meadow  251.708333  154.22526 349.1914 0.0000013
## conventional-organic   5.833333  -91.64974 103.3164 0.9983535
## 
## [1] "pathotroph_richness"
## [1] "10...20"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  2.2339 0.1102
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 21.058, df = 3, p-value = 0.0001024
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest  meadow  organic
## meadow       0.02909 -       -      
## organic      0.01818 0.00047 -      
## conventional 0.01818 0.00047 0.13879
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## sample_type  3 298602   99534   29.77 2.95e-08 ***
## Residuals   24  80239    3343                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff       lwr      upr     p adj
## meadow-forest         61.62500 -46.36138 169.6114 0.4114727
## organic-forest       219.25000 111.26362 327.2364 0.0000513
## conventional-forest  274.66667 168.32903 381.0043 0.0000013
## organic-meadow       157.62500  77.87177 237.3782 0.0000742
## conventional-meadow  213.04167 135.53546 290.5479 0.0000005
## conventional-organic  55.41667 -22.08954 132.9229 0.2261312
## 
## [1] "pathotroph_richness"
## [1] "20...30"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  2.2613 0.1071
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 8.05, df = 3, p-value = 0.04499
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.073  -      -      
## organic      0.073  0.157  -      
## conventional 0.104  0.386  0.888  
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)  
## sample_type  3  93092   31031   2.547 0.0797 .
## Residuals   24 292356   12182                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff        lwr      upr     p adj
## meadow-forest         75.58333 -130.54194 281.7086 0.7443125
## organic-forest       171.70833  -34.41694 377.8336 0.1266336
## conventional-forest  157.66667  -45.31147 360.6448 0.1684675
## organic-meadow        96.12500  -56.10861 248.3586 0.3250720
## conventional-meadow   82.08333  -65.86114 230.0278 0.4357382
## conventional-organic -14.04167 -161.98614 133.9028 0.9935429
## 
## [1] "pathotroph_richness"
## [1] "30...40"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  1.1511 0.3488
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 5.8552, df = 3, p-value = 0.1189
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.18   -      -      
## organic      0.49   0.75   -      
## conventional 0.18   0.18   0.47   
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)
## sample_type  3   1589   529.8    1.81  0.172
## Residuals   24   7024   292.7               
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff        lwr      upr     p adj
## meadow-forest        11.208333 -20.742112 43.15878 0.7687688
## organic-forest       15.958333 -15.992112 47.90878 0.5247314
## conventional-forest  24.444444  -7.018179 55.90707 0.1683269
## organic-meadow        4.750000 -18.846968 28.34697 0.9441916
## conventional-meadow  13.236111  -9.696019 36.16824 0.4016916
## conventional-organic  8.486111 -14.446019 31.41824 0.7390336
## 
## [1] "pathotroph_richness"
## [1] "40..."
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3    1.33 0.2879
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 7.1979, df = 3, p-value = 0.06585
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.083  -      -      
## organic      0.083  0.665  -      
## conventional 0.083  0.727  0.665  
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)
## sample_type  3  114.1   38.03   1.368  0.276
## Residuals   24  666.9   27.79               
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff        lwr       upr     p adj
## meadow-forest         4.166667  -5.678021 14.011355 0.6524360
## organic-forest        6.166667  -3.678021 16.011355 0.3317956
## conventional-forest   2.111111  -7.583267 11.805490 0.9308262
## organic-meadow        2.000000  -5.270784  9.270784 0.8719962
## conventional-meadow  -2.055556  -9.121487  5.010376 0.8525672
## conventional-organic -4.055556 -11.121487  3.010376 0.4065242
# Combine list into a single dataframe
combined_df <- bind_rows(lapply(names(means_and_ses), function(name) {
  df <- means_and_ses[[name]]
  df$result_name <- name
  return(df)
}), .id = "id")

# View the combined dataframe
print(combined_df)
## # A tibble: 80 × 5
##    id    sample_type   mean    se result_name                     
##    <chr> <fct>        <dbl> <dbl> <chr>                           
##  1 1     forest       1478. 438.  depth_0...10_diversity_observed 
##  2 1     meadow       1132. 158.  depth_0...10_diversity_observed 
##  3 1     organic      2895. 242.  depth_0...10_diversity_observed 
##  4 1     conventional 2724. 188.  depth_0...10_diversity_observed 
##  5 2     forest       1608.  93.0 depth_10...20_diversity_observed
##  6 2     meadow       2220. 219.  depth_10...20_diversity_observed
##  7 2     organic      2652. 265.  depth_10...20_diversity_observed
##  8 2     conventional 2897. 181.  depth_10...20_diversity_observed
##  9 3     forest        792. 249.  depth_20...30_diversity_observed
## 10 3     meadow       2126. 226.  depth_20...30_diversity_observed
## # ℹ 70 more rows
# save the mean and se values

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

write.csv2(combined_df, file = "Richness_mean_and_ses.csv")

RESULTS STEP 11: Test between management the 5 most abundant taxa

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')


load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 35 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
ps_RA <- transform(ps, "compositional")

meta <- meta(ps)

1. Five most abundant phyla

ps_RA_x <- aggregate_rare(ps_RA, level = "phylum", detection = 0, prevalence = 0)
ps_RA_x              
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 14 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 35 sample variables ]
## tax_table()   Taxonomy Table:    [ 14 taxa by 1 taxonomic ranks ]
# 14 taxa and 140 samples


#create data table
x_df <-  psmelt(ps_RA_x)

x_df$year <- "2019"

x <- x_df %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups:   year [1]
##   year  OTU                  mean      se
##   <chr> <chr>               <dbl>   <dbl>
## 1 2019  Ascomycota        0.661   0.0163 
## 2 2019  Basidiomycota     0.235   0.0159 
## 3 2019  Mortierellomycota 0.0759  0.00873
## 4 2019  Glomeromycota     0.0240  0.00613
## 5 2019  Rozellomycota     0.00228 0.00109

1.2. Test for each 5 phyla

library(car)

taxa <- y$OTU

for (i in taxa) {
 df <- filter(x_df, OTU == i)
 print(i)
 # Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)

# perform the Kruskal test
k <- kruskal.test(Abundance ~ sample_type, data = df)
print(k)

w <- pairwise.wilcox.test(df$Abundance, df$sample_type,
                     p.adjust.method = "BH")
print(w)
}
## [1] "Ascomycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  1.5105 0.2146
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 27.535, df = 3, p-value = 4.546e-06
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest  meadow organic
## meadow       5.9e-05 -      -      
## organic      2.2e-07 0.42   -      
## conventional 1.4e-07 0.35   0.70   
## 
## P value adjustment method: BH 
## [1] "Basidiomycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  1.2681 0.2879
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 26.818, df = 3, p-value = 6.428e-06
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest  meadow organic
## meadow       1.1e-05 -      -      
## organic      1.1e-05 0.52   -      
## conventional 2.2e-07 0.48   0.21   
## 
## P value adjustment method: BH 
## [1] "Mortierellomycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  1.6242 0.1867
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 7.126, df = 3, p-value = 0.06799
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.902  -      -      
## organic      0.902  0.902  -      
## conventional 0.180  0.180  0.076  
## 
## P value adjustment method: BH 
## [1] "Glomeromycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  1.0182 0.3867
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 5.8342, df = 3, p-value = 0.12
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.14   -      -      
## organic      0.27   0.36   -      
## conventional 0.30   0.27   0.91   
## 
## P value adjustment method: BH 
## [1] "Rozellomycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value   Pr(>F)   
## group   3  4.5426 0.004562 **
##       136                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 11.375, df = 3, p-value = 0.00986
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.120  -      -      
## organic      0.058  0.120  -      
## conventional 0.436  0.120  0.041  
## 
## P value adjustment method: BH
x <- x_df %>%
  filter(OTU %in% taxa) %>% group_by(OTU, sample_type) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 20 × 4
## # Groups:   OTU [5]
##    OTU               sample_type      mean        se
##    <chr>             <fct>           <dbl>     <dbl>
##  1 Ascomycota        forest       0.402    0.0486   
##  2 Ascomycota        meadow       0.670    0.0293   
##  3 Ascomycota        organic      0.692    0.0264   
##  4 Ascomycota        conventional 0.711    0.0231   
##  5 Basidiomycota     forest       0.533    0.0555   
##  6 Basidiomycota     meadow       0.210    0.0261   
##  7 Basidiomycota     organic      0.220    0.0247   
##  8 Basidiomycota     conventional 0.170    0.0191   
##  9 Glomeromycota     forest       0.00368  0.00177  
## 10 Glomeromycota     meadow       0.0325   0.0121   
## 11 Glomeromycota     organic      0.0333   0.0158   
## 12 Glomeromycota     conventional 0.0149   0.00690  
## 13 Mortierellomycota forest       0.0470   0.0158   
## 14 Mortierellomycota meadow       0.0860   0.0195   
## 15 Mortierellomycota organic      0.0522   0.0120   
## 16 Mortierellomycota conventional 0.0975   0.0167   
## 17 Rozellomycota     forest       0.0131   0.00974  
## 18 Rozellomycota     meadow       0.000315 0.0000708
## 19 Rozellomycota     organic      0.000163 0.0000405
## 20 Rozellomycota     conventional 0.00228  0.000714

1.3. Test phyla in soil layers

1.3.1 meadow, organic and conevntional only

1.3.1.1. Five most abundant phyla

x_df_nf <- subset(x_df, sample_type!="forest")

x_df_nf$depth <- as.factor(x_df_nf$depth)
x_df_nf$year <- "2019"

x <- x_df_nf %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups:   year [1]
##   year  OTU                  mean       se
##   <chr> <chr>               <dbl>    <dbl>
## 1 2019  Ascomycota        0.692   0.0151  
## 2 2019  Basidiomycota     0.199   0.0134  
## 3 2019  Mortierellomycota 0.0793  0.00956 
## 4 2019  Glomeromycota     0.0264  0.00683 
## 5 2019  Chytridiomycota   0.00209 0.000362
taxa <- y$OTU

x <- x_df_nf %>%
  filter(OTU %in% taxa) %>% group_by(OTU, depth) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 25 × 4
## # Groups:   OTU [5]
##    OTU           depth    mean     se
##    <chr>         <fct>   <dbl>  <dbl>
##  1 Ascomycota    0...10  0.702 0.0227
##  2 Ascomycota    10...20 0.682 0.0190
##  3 Ascomycota    20...30 0.696 0.0207
##  4 Ascomycota    30...40 0.627 0.0486
##  5 Ascomycota    40...   0.752 0.0431
##  6 Basidiomycota 0...10  0.265 0.0198
##  7 Basidiomycota 10...20 0.283 0.0188
##  8 Basidiomycota 20...30 0.196 0.0248
##  9 Basidiomycota 30...40 0.114 0.0320
## 10 Basidiomycota 40...   0.136 0.0364
## # ℹ 15 more rows
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

write.csv2(x, file = "5_Phyla_in_soil_layers_WITHOUT_forest_mean.csv")
library(car)
library("rcompanion")
library("multcompView")

taxa <- y$OTU

for (i in taxa) {
  df <- subset(x_df_nf, OTU==i)
  # Using leveneTest()
  print(i)
result = leveneTest(Abundance ~ depth, df)
# print the result
print(result)

# perform the Kruskal test
k <- kruskal.test(Abundance ~ depth, data = df)
print(k)

wilcox.res <- pairwise.wilcox.test(df$Abundance, df$depth,
                     p.adjust.method = "BH")
wilcox.res <- wilcox.res[["p.value"]]
print(wilcox.res)
mc = fullPTable(wilcox.res)
mc <- multcompLetters(mc)
mc <- mc[["Letters"]]
print(mc)
}
## [1] "Ascomycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value   Pr(>F)    
## group   4  7.2906 2.73e-05 ***
##       120                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 4.2424, df = 4, p-value = 0.3742
## 
##            0...10   10...20   20...30   30...40
## 10...20 0.6802454        NA        NA        NA
## 20...30 0.8626083 0.6802454        NA        NA
## 30...40 0.6802454 0.8626083 0.7375178        NA
## 40...   0.6574517 0.6574517 0.6574517 0.6212054
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "a"     "a"     "a" 
## [1] "Basidiomycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   4  1.0875 0.3659
##       120               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 38.942, df = 4, p-value = 7.161e-08
## 
##               0...10      10...20     20...30   30...40
## 10...20 6.119242e-01           NA          NA        NA
## 20...30 3.091939e-02 4.118506e-03          NA        NA
## 30...40 2.260766e-06 2.260766e-06 0.002588052        NA
## 40...   1.622194e-03 5.646255e-04 0.025631670 0.6721398
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "b"     "c"     "c" 
## [1] "Mortierellomycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   4   5.562 0.0003841 ***
##       120                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 39.225, df = 4, p-value = 6.259e-08
## 
##               0...10      10...20    20...30      30...40
## 10...20 8.866322e-02           NA         NA           NA
## 20...30 3.972787e-05 3.234148e-04         NA           NA
## 30...40 1.302516e-06 1.302516e-06 0.02136213           NA
## 40...   8.777488e-01 7.468220e-01 0.04799830 0.0008671149
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "b"     "c"     "a" 
## [1] "Glomeromycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   4  4.9745 0.0009581 ***
##       120                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 23.482, df = 4, p-value = 0.0001014
## 
##               0...10     10...20    20...30   30...40
## 10...20 9.666451e-03          NA         NA        NA
## 20...30 3.425034e-05 0.009666451         NA        NA
## 30...40 8.343651e-04 0.003236620 0.04959274        NA
## 40...   5.593752e-01 0.510709727 0.33419998 0.1147993
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "b"     "c"     "d"  "abcd" 
## [1] "Chytridiomycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value   Pr(>F)   
## group   4  4.0054 0.004377 **
##       120                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 84.286, df = 4, p-value < 2.2e-16
## 
##               0...10      10...20      20...30   30...40
## 10...20 5.507318e-01           NA           NA        NA
## 20...30 3.796944e-01 2.030024e-01           NA        NA
## 30...40 1.071611e-08 1.071611e-08 1.991630e-07        NA
## 40...   6.881199e-10 6.881199e-10 7.192628e-09 0.1413528
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "a"     "b"     "b"

1.3.1.2. AMF below 30 cm

taxa <- "Glomeromycota"

# Construct a data.frame with the selected taxonomic group
df <- filter(x_df_nf, OTU == taxa & depth_numerical > 40)

#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  1.1016   0.35
##       22
x <- df %>%
  group_by(sample_type) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 3 × 3
##   sample_type    mean     se
##   <fct>         <dbl>  <dbl>
## 1 meadow       0.0779 0.0511
## 2 organic      0.0404 0.0179
## 3 conventional 0.0165 0.0139
# one-way ANOVA
res.aov <- aov(Abundance ~ sample_type, data = df)
# Summary of the analysis
print(summary(res.aov))
##             Df Sum Sq  Mean Sq F value Pr(>F)
## sample_type  2 0.0161 0.008049   0.992  0.387
## Residuals   22 0.1785 0.008113

1.3.2 Forest only

1.3.2.1. Five most abundant phyla

x_df_nf <- subset(x_df, sample_type=="forest")

x_df_nf$depth <- as.factor(x_df_nf$depth)
x_df_nf$year <- "2019"

x <- x_df_nf %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups:   year [1]
##   year  OTU                  mean      se
##   <chr> <chr>               <dbl>   <dbl>
## 1 2019  Basidiomycota     0.533   0.0555 
## 2 2019  Ascomycota        0.402   0.0486 
## 3 2019  Mortierellomycota 0.0470  0.0158 
## 4 2019  Rozellomycota     0.0131  0.00974
## 5 2019  Glomeromycota     0.00368 0.00177
taxa <- y$OTU

x <- x_df_nf %>%
  filter(OTU %in% taxa) %>% group_by(OTU, depth) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 25 × 4
## # Groups:   OTU [5]
##    OTU           depth    mean     se
##    <chr>         <fct>   <dbl>  <dbl>
##  1 Ascomycota    0...10  0.478 0.0839
##  2 Ascomycota    10...20 0.516 0.0422
##  3 Ascomycota    20...30 0.274 0.160 
##  4 Ascomycota    30...40 0.407 0.129 
##  5 Ascomycota    40...   0.336 0.109 
##  6 Basidiomycota 0...10  0.449 0.121 
##  7 Basidiomycota 10...20 0.457 0.0382
##  8 Basidiomycota 20...30 0.597 0.213 
##  9 Basidiomycota 30...40 0.510 0.149 
## 10 Basidiomycota 40...   0.649 0.0941
## # ℹ 15 more rows
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

write.csv2(x, file = "5_Phyla_in_soil_layers_ONLY_forest_mean.csv")
taxa <- "Glomeromycota"

# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)

#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ depth, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  4  3.1178 0.06588 .
##       10                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
kruskal.test(Abundance ~ depth, data = df)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 7.3861, df = 4, p-value = 0.1168

2. Class

ps_RA_x <- aggregate_rare(ps_RA, level = "class", detection = 0, prevalence = 0)
ps_RA_x              
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 68 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 35 sample variables ]
## tax_table()   Taxonomy Table:    [ 68 taxa by 1 taxonomic ranks ]
# 68 taxa and 140 samples


#create data table
x_df <-  psmelt(ps_RA_x)

2.1. Five most abundant class

x_df$year <- "2019"

x <- x_df %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups:   year [1]
##   year  OTU               mean     se
##   <chr> <chr>            <dbl>  <dbl>
## 1 2019  Leotiomycetes   0.282  0.0200
## 2 2019  Sordariomycetes 0.169  0.0124
## 3 2019  Dothideomycetes 0.127  0.0128
## 4 2019  Tremellomycetes 0.126  0.0105
## 5 2019  Agaricomycetes  0.0884 0.0145
taxa <- y$OTU

x <- x_df %>%
  filter(OTU %in% taxa) %>% group_by(OTU, sample_type) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 20 × 4
## # Groups:   OTU [5]
##    OTU             sample_type    mean      se
##    <chr>           <fct>         <dbl>   <dbl>
##  1 Agaricomycetes  forest       0.448  0.0653 
##  2 Agaricomycetes  meadow       0.0583 0.00947
##  3 Agaricomycetes  organic      0.0592 0.0217 
##  4 Agaricomycetes  conventional 0.0215 0.00727
##  5 Dothideomycetes forest       0.0501 0.0130 
##  6 Dothideomycetes meadow       0.189  0.0351 
##  7 Dothideomycetes organic      0.109  0.0178 
##  8 Dothideomycetes conventional 0.115  0.0151 
##  9 Leotiomycetes   forest       0.202  0.0354 
## 10 Leotiomycetes   meadow       0.301  0.0367 
## 11 Leotiomycetes   organic      0.270  0.0398 
## 12 Leotiomycetes   conventional 0.302  0.0375 
## 13 Sordariomycetes forest       0.0309 0.00929
## 14 Sordariomycetes meadow       0.0874 0.0157 
## 15 Sordariomycetes organic      0.239  0.0229 
## 16 Sordariomycetes conventional 0.225  0.0210 
## 17 Tremellomycetes forest       0.0757 0.0285 
## 18 Tremellomycetes meadow       0.139  0.0215 
## 19 Tremellomycetes organic      0.148  0.0209 
## 20 Tremellomycetes conventional 0.110  0.0157
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

write.csv2(x, file = "5_Class_in_soil_layers_All_Management.csv")

2.2. Test for each class separately

taxa <- y$OTU

for (i in taxa) {
  # Construct a data.frame with the selected taxonomic group
df <- filter(x_df, OTU == i)
print(i)

# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)


# perform the Kruskal test
k <- kruskal.test(Abundance ~ sample_type, data = df)
print(k)

w <- pairwise.wilcox.test(df$Abundance, df$sample_type,
                     p.adjust.method = "BH")
print(w)
}
## [1] "Leotiomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  0.6472  0.586
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 3.8991, df = 3, p-value = 0.2726
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.33   -      -      
## organic      0.61   0.33   -      
## conventional 0.33   0.61   0.33   
## 
## P value adjustment method: BH 
## [1] "Sordariomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   3  8.0545 5.591e-05 ***
##       136                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 47.221, df = 3, p-value = 3.12e-10
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest  meadow  organic
## meadow       0.0057  -       -      
## organic      4.9e-07 1.9e-06 -      
## conventional 4.9e-07 1.9e-06 0.6393 
## 
## P value adjustment method: BH 
## [1] "Dothideomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   3  5.7442 0.0009917 ***
##       136                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 9.5392, df = 3, p-value = 0.02292
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.024  -      -      
## organic      0.134  0.134  -      
## conventional 0.024  0.415  0.560  
## 
## P value adjustment method: BH 
## [1] "Tremellomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  1.4793  0.223
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 5.8864, df = 3, p-value = 0.1173
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.20   -      -      
## organic      0.20   0.71   -      
## conventional 0.33   0.33   0.23   
## 
## P value adjustment method: BH 
## [1] "Agaricomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   3   13.41 1.018e-07 ***
##       136                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 44.002, df = 3, p-value = 1.508e-09
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest  meadow  organic
## meadow       4.7e-08 -       -      
## organic      4.7e-08 0.05845 -      
## conventional 4.7e-08 0.00015 0.52326
## 
## P value adjustment method: BH
x <- x_df %>%
  filter(OTU %in% taxa) %>% group_by(OTU, sample_type) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 20 × 4
## # Groups:   OTU [5]
##    OTU             sample_type    mean      se
##    <chr>           <fct>         <dbl>   <dbl>
##  1 Agaricomycetes  forest       0.448  0.0653 
##  2 Agaricomycetes  meadow       0.0583 0.00947
##  3 Agaricomycetes  organic      0.0592 0.0217 
##  4 Agaricomycetes  conventional 0.0215 0.00727
##  5 Dothideomycetes forest       0.0501 0.0130 
##  6 Dothideomycetes meadow       0.189  0.0351 
##  7 Dothideomycetes organic      0.109  0.0178 
##  8 Dothideomycetes conventional 0.115  0.0151 
##  9 Leotiomycetes   forest       0.202  0.0354 
## 10 Leotiomycetes   meadow       0.301  0.0367 
## 11 Leotiomycetes   organic      0.270  0.0398 
## 12 Leotiomycetes   conventional 0.302  0.0375 
## 13 Sordariomycetes forest       0.0309 0.00929
## 14 Sordariomycetes meadow       0.0874 0.0157 
## 15 Sordariomycetes organic      0.239  0.0229 
## 16 Sordariomycetes conventional 0.225  0.0210 
## 17 Tremellomycetes forest       0.0757 0.0285 
## 18 Tremellomycetes meadow       0.139  0.0215 
## 19 Tremellomycetes organic      0.148  0.0209 
## 20 Tremellomycetes conventional 0.110  0.0157

2.3. Test classes in soil layers

2.3.1 meadow, organic and conevntional only

2.3.1.1. Five most abundant classes

x_df_nf <- subset(x_df, sample_type!="forest")

x_df_nf$depth <- as.factor(x_df_nf$depth)
x_df_nf$year <- "2019"

x <- x_df_nf %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups:   year [1]
##   year  OTU                  mean      se
##   <chr> <chr>               <dbl>   <dbl>
## 1 2019  Leotiomycetes      0.291  0.0218 
## 2 2019  Sordariomycetes    0.185  0.0131 
## 3 2019  Dothideomycetes    0.137  0.0140 
## 4 2019  Tremellomycetes    0.132  0.0111 
## 5 2019  Mortierellomycetes 0.0793 0.00956
taxa <- y$OTU

x <- x_df_nf %>%
  filter(OTU %in% taxa) %>% group_by(OTU, depth) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 25 × 4
## # Groups:   OTU [5]
##    OTU             depth     mean     se
##    <chr>           <fct>    <dbl>  <dbl>
##  1 Dothideomycetes 0...10  0.296  0.0438
##  2 Dothideomycetes 10...20 0.193  0.0160
##  3 Dothideomycetes 20...30 0.0870 0.0164
##  4 Dothideomycetes 30...40 0.0515 0.0207
##  5 Dothideomycetes 40...   0.0548 0.0172
##  6 Leotiomycetes   0...10  0.115  0.0183
##  7 Leotiomycetes   10...20 0.150  0.0126
##  8 Leotiomycetes   20...30 0.310  0.0394
##  9 Leotiomycetes   30...40 0.422  0.0482
## 10 Leotiomycetes   40...   0.460  0.0626
## # ℹ 15 more rows
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

write.csv2(x, file = "5_Classes_in_soil_layers_WITHOUT_forest_mean.csv")
taxa <- y$OTU

for (i in taxa) {
  # Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==i)
print(i)

# Using leveneTest()
result = leveneTest(Abundance ~ depth, df)
# print the result
print(result)

# perform the Kruskal test
k <- kruskal.test(Abundance ~ depth, data = df)
print(k)

wilcox.res <- pairwise.wilcox.test(df$Abundance, df$depth,
                     p.adjust.method = "BH")
wilcox.res <- wilcox.res[["p.value"]]
print(wilcox.res)

mc = fullPTable(wilcox.res)
mc <- multcompLetters(mc)
mc <- mc[["Letters"]]
print(mc)
}
## [1] "Leotiomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   4  14.875 6.712e-10 ***
##       120                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 43.951, df = 4, p-value = 6.568e-09
## 
##               0...10      10...20    20...30   30...40
## 10...20 2.170595e-02           NA         NA        NA
## 20...30 1.819319e-05 3.105480e-03         NA        NA
## 30...40 1.819319e-05 2.393976e-05 0.08486076        NA
## 40...   1.819319e-05 4.506015e-05 0.11043685 0.8626083
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "b"     "c"     "c"     "c" 
## [1] "Sordariomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)  
## group   4  2.6173 0.03845 *
##       120                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 25.086, df = 4, p-value = 4.835e-05
## 
##               0...10      10...20      20...30   30...40
## 10...20 3.753357e-01           NA           NA        NA
## 20...30 3.753357e-01 2.425450e-01           NA        NA
## 30...40 8.223114e-05 8.223114e-05 0.0002982584        NA
## 40...   2.183687e-01 1.084402e-01 0.3753357254 0.1072279
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "a"     "b"    "ab" 
## [1] "Dothideomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   4  5.7029 0.0003088 ***
##       120                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 64.123, df = 4, p-value = 3.937e-13
## 
##               0...10      10...20      20...30   30...40
## 10...20 3.510510e-01           NA           NA        NA
## 20...30 9.043065e-07 2.433622e-06           NA        NA
## 30...40 4.846508e-08 5.869563e-08 0.0005249541        NA
## 40...   1.060943e-07 3.675922e-07 0.0151251757 0.5900142
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "b"     "c"     "c" 
## [1] "Tremellomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value   Pr(>F)   
## group   4  4.5628 0.001825 **
##       120                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 72.482, df = 4, p-value = 6.787e-15
## 
##               0...10      10...20      20...30    30...40
## 10...20 3.448406e-01           NA           NA         NA
## 20...30 3.257171e-02 1.715879e-03           NA         NA
## 30...40 1.503038e-12 3.164291e-13 6.082530e-06         NA
## 40...   1.211627e-08 4.250065e-09 2.741038e-05 0.04327362
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "b"     "c"     "d" 
## [1] "Mortierellomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   4   5.562 0.0003841 ***
##       120                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 39.225, df = 4, p-value = 6.259e-08
## 
##               0...10      10...20    20...30      30...40
## 10...20 8.866322e-02           NA         NA           NA
## 20...30 3.972787e-05 3.234148e-04         NA           NA
## 30...40 1.302516e-06 1.302516e-06 0.02136213           NA
## 40...   8.777488e-01 7.468220e-01 0.04799830 0.0008671149
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "b"     "c"     "a"

2.3.2 Forest only

2.3.2.1. Five most abundant classes

x_df_nf <- subset(x_df, sample_type=="forest")

x_df_nf$depth <- as.factor(x_df_nf$depth)
x_df_nf$year <- "2019"

x <- x_df_nf %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups:   year [1]
##   year  OTU               mean     se
##   <chr> <chr>            <dbl>  <dbl>
## 1 2019  Agaricomycetes  0.448  0.0653
## 2 2019  Leotiomycetes   0.202  0.0354
## 3 2019  Tremellomycetes 0.0757 0.0285
## 4 2019  Pezizomycetes   0.0520 0.0233
## 5 2019  Dothideomycetes 0.0501 0.0130
taxa <- y$OTU

x <- x_df_nf %>%
  filter(OTU %in% taxa) %>% group_by(OTU, depth) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 25 × 4
## # Groups:   OTU [5]
##    OTU             depth      mean      se
##    <chr>           <fct>     <dbl>   <dbl>
##  1 Agaricomycetes  0...10  0.217   0.0933 
##  2 Agaricomycetes  10...20 0.395   0.0287 
##  3 Agaricomycetes  20...30 0.539   0.260  
##  4 Agaricomycetes  30...40 0.507   0.151  
##  5 Agaricomycetes  40...   0.578   0.0838 
##  6 Dothideomycetes 0...10  0.0996  0.0324 
##  7 Dothideomycetes 10...20 0.0913  0.00381
##  8 Dothideomycetes 20...30 0.0418  0.0285 
##  9 Dothideomycetes 30...40 0.0121  0.0120 
## 10 Dothideomycetes 40...   0.00578 0.00573
## # ℹ 15 more rows
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

write.csv2(x, file = "5_Classes_in_soil_layers_ONLY_forest_mean.csv")
taxa <- y$OTU

for (i in taxa) {
  # Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==i)
print(i)

# Using leveneTest()
result = leveneTest(Abundance ~ depth, df)
# print the result
print(result)

# perform the Kruskal test
k <- kruskal.test(Abundance ~ depth, data = df)
print(k)

wilcox.res <- pairwise.wilcox.test(df$Abundance, df$depth,
                     p.adjust.method = "BH")
wilcox.res <- wilcox.res[["p.value"]]
print(wilcox.res)

mc = fullPTable(wilcox.res)
mc <- multcompLetters(mc)
mc <- mc[["Letters"]]
print(mc)
}
## [1] "Agaricomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  4  0.8107 0.5461
##       10               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 4.7, df = 4, p-value = 0.3195
## 
##            0...10 10...20 20...30 30...40
## 10...20 1.0000000      NA      NA      NA
## 20...30 1.0000000     1.0      NA      NA
## 30...40 0.6666667     1.0       1      NA
## 40...   0.5000000     0.5       1       1
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "a"     "a"     "a" 
## [1] "Leotiomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  4  0.2981 0.8726
##       10               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 2.0667, df = 4, p-value = 0.7235
## 
##            0...10   10...20   20...30   30...40
## 10...20 0.7777778        NA        NA        NA
## 20...30 0.7777778 0.7777778        NA        NA
## 30...40 0.7777778 0.7777778 0.7777778        NA
## 40...   0.7777778 0.7777778 1.0000000 0.7777778
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "a"     "a"     "a" 
## [1] "Tremellomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  4  1.3902 0.3055
##       10               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 7.1667, df = 4, p-value = 0.1273
## 
##            0...10   10...20   20...30 30...40
## 10...20 0.4000000        NA        NA      NA
## 20...30 0.4000000 0.7777778        NA      NA
## 30...40 0.4000000 0.4000000 0.5714286      NA
## 40...   0.5714286 0.7777778 1.0000000     0.4
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "a"     "a"     "a" 
## [1] "Pezizomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  4   1.392 0.3049
##       10               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 8.1, df = 4, p-value = 0.08798
## 
##            0...10   10...20 20...30   30...40
## 10...20 0.4000000        NA      NA        NA
## 20...30 0.2500000 0.5714286      NA        NA
## 30...40 0.2500000 0.2500000    0.25        NA
## 40...   0.7777778 0.7777778    1.00 0.5714286
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "a"     "a"     "a" 
## [1] "Dothideomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  4  0.5163 0.7259
##       10               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 8.5667, df = 4, p-value = 0.07289
## 
##            0...10   10...20 20...30 30...40
## 10...20 0.7777778        NA      NA      NA
## 20...30 0.4000000 0.5000000      NA      NA
## 30...40 0.4000000 0.3333333     0.5      NA
## 40...   0.3333333 0.3333333     0.5       1
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "a"     "a"     "a"

2.3.3 Test Classes that popped up in certain layers

ps_RA_x <- aggregate_rare(ps_RA, level = "class", detection = 0, prevalence = 0)
ps_RA_x              
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 68 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 35 sample variables ]
## tax_table()   Taxonomy Table:    [ 68 taxa by 1 taxonomic ranks ]
# 68 taxa and 140 samples


#create data table
x_df <-  psmelt(ps_RA_x)

This senetence based on looking at the class composition barplot:

“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Archaeosporomycetes in organic 30-80 cm, Geoglossomycetes in forest 10-20 cm, meadow 10-40 cm and organic 20-80 cm, Microbotryomycetes in conventional 40-80 cm, and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.”

Let’s test them separately

2.3.3.1. Pezizomycetes

x_df_nf <- subset(x_df, sample_type=="forest")

x_df_nf$compare <- NA

x_df_nf$compare[x_df_nf$depth=="30...40"]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Pezizomycetes"

# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)

#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1  2.6256 0.1291
##       13
# first one-way ANOVA
res.aov <- aov(Abundance ~ compare, data = df)
# Summary of the analysis
print(summary(res.aov))
##             Df  Sum Sq Mean Sq F value  Pr(>F)   
## compare      1 0.05219 0.05219   10.96 0.00564 **
## Residuals   13 0.06193 0.00476                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

This sentence remains to be tested:

“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Archaeosporomycetes in organic 30-80 cm, Geoglossomycetes in forest 10-20 cm, meadow 10-40 cm and organic 20-80 cm, Microbotryomycetes in conventional 40-80 cm, and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.”

2.3.3.2. Archaeosporomycetes

x_df_nf <- subset(x_df, sample_type=="organic")

x_df_nf$compare <- NA

x_df_nf$compare[x_df_nf$depth=="30...40"]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Archaeosporomycetes"

# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)

#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  1  5.1753 0.02864 *
##       38                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by compare
## Kruskal-Wallis chi-squared = 0, df = 1, p-value = 1

Not significant!

This sentence remains to be tested:

“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes in meadow 10-40 cm, Microbotryomycetes in conventional 40-80 cm, and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.”

2.3.3.3. Geoglossomycetes

not sig. in:

  • forest 10-20 cm
  • organic 20-80 cm
x_df_nf <- subset(x_df, sample_type=="meadow")

x_df_nf$compare <- NA

x_df_nf$compare[x_df_nf$depth=="20...30"]<-"yes"
x_df_nf$compare[x_df_nf$depth=="30...40"]<-"yes"
x_df_nf$compare[x_df_nf$depth=="10...20"]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Geoglossomycetes"

# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)

#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value    Pr(>F)    
## group  1  21.441 4.181e-05 ***
##       38                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by compare
## Kruskal-Wallis chi-squared = 14.099, df = 1, p-value = 0.0001734

Is sig. in meadow 10-40 cm

“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes meadow 10-40 cm (Kruskal; p = 0.000), Microbotryomycetes in conventional 40-80 cm (Kruskal, P = 0.030), and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.”

2.3.3.4. Microbotryomycetes

x_df_nf <- subset(x_df, sample_type=="conventional")

x_df_nf$compare <- NA

x_df_nf$compare[x_df_nf$depth=="40..."]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Microbotryomycetes"

# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)

#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value   Pr(>F)   
## group  1  7.2692 0.009974 **
##       43                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by compare
## Kruskal-Wallis chi-squared = 4.6876, df = 1, p-value = 0.03038

Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes meadow 10-40 cm (Kruskal; p = 0.000), Microbotryomycetes in conventional 40-80 cm (Kruskal, P = 0.030), and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.

2.3.3.5. Glomeromycetes

x_df_nf <- subset(x_df, sample_type=="meadow")

x_df_nf$compare <- NA

x_df_nf$compare[x_df_nf$depth=="30...40"]<-"yes"
x_df_nf$compare[x_df_nf$depth=="40..."]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Glomeromycetes"

# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)

#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value   Pr(>F)   
## group  1  7.4061 0.009755 **
##       38                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by compare
## Kruskal-Wallis chi-squared = 0.93404, df = 1, p-value = 0.3338

Not sig.

Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes meadow 10-40 cm (Kruskal; p = 0.000), Microbotryomycetes in conventional 40-80 cm (Kruskal, P = 0.030), and Orbiliomycetes in meadow 10-30 cm.

2.3.3.6. Orbiliomycetes.

x_df_nf <- subset(x_df, sample_type=="meadow")

x_df_nf$compare <- NA

x_df_nf$compare[x_df_nf$depth=="10...20"]<-"yes"
x_df_nf$compare[x_df_nf$depth=="20...30"]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Orbiliomycetes"

# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)

#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value    Pr(>F)    
## group  1  18.614 0.0001099 ***
##       38                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by compare
## Kruskal-Wallis chi-squared = 19.128, df = 1, p-value = 1.222e-05

This sentence remains valid:

“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes meadow 10-40 cm (Kruskal; p = 0.000), Microbotryomycetes in conventional 40-80 cm (Kruskal, P = 0.030), and Orbiliomycetes in meadow 10-30 cm (Kruskal; p = 0.000).”


RESULTS STEP 12: AMF PERMANOVA

AMF PERMANOVA will be done at genus level, because the PERMANOVA is used to support the AMF bubble plot in STEP 13, which is done at genus level.

1. Load data

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("pairwiseAdonis")


setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')


load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 35 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
ps_GM <- subset_taxa(ps, phylum=="Glomeromycota")
ps_GM
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 263 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 35 sample variables ]
## tax_table()   Taxonomy Table:    [ 263 taxa by 7 taxonomic ranks ]
# 263 taxa and 140 samples

ps_GM <- aggregate_rare(ps_GM, level = "genus", detection = 0, prevalence = 0)
ps_GM
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 17 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 35 sample variables ]
## tax_table()   Taxonomy Table:    [ 17 taxa by 1 taxonomic ranks ]
# 17 taxa and 140 samples

# Pick relative abundances (compositional) and sample metadata 
ps_RA <- microbiome::transform(ps_GM, "compositional")
otu <- abundances(ps_RA)
meta <- meta(ps)

2. Check samples WO AMF

PERMANOVA cannot handle NAs, so I have to remove samples that do not have any AMF taxa from the analysis

# check how many AMF with dim (although I already know it is 17)
dim(otu)
## [1]  17 140
# If column sum adds up to zero, it means that that sample doesn't have any AMF. check how many zero values samples I have, and remove sample if all (17) are zero
x <- colSums(otu==0)==17
z <- which(x, arr.ind = FALSE, useNames = TRUE)
print(z)
##  CG9.1_30to40  CG9.1_40to70 CPO5.1_40to70 CPO5.2_40to70 CR14.1_40to80 
##             4             5            20            25            35 
##     M2_40to60     M3_30to40     M3_40to60  NG2A1_40to70  NG2B3_40to70 
##            55            59            60            65            90 
## OG10.2_40to70 OG10.3_30to40 OG10.3_40to70 OR13.1_30to40 OR13.1_40to80 
##           110           114           115           129           130
Samples_toRemove <- c("CG9.1_30to40", "CG9.1_40to70", "CPO5.1_40to70", "CPO5.2_40to70", "CR14.1_40to80", "M2_40to60", "M3_30to40", "M3_40to60", "NG2A1_40to70", "NG2B3_40to70", "OG10.2_40to70", "OG10.3_30to40", "OG10.3_40to70", "OR13.1_30to40", "OR13.1_40to80")

ps_GM_pruned <- subset_samples(ps_RA, !(sampleID %in% Samples_toRemove))
ps_GM_pruned
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 17 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 35 sample variables ]
## tax_table()   Taxonomy Table:    [ 17 taxa by 1 taxonomic ranks ]
# 17 taxa and 125 samples remained in the dataset

# lets make sure all missing taxa are removed
prev0 = apply(X = otu_table(ps_GM_pruned),
              MARGIN = ifelse(taxa_are_rows(ps_GM_pruned), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_GM_pruned = prune_taxa((prev0 > 0), ps_GM_pruned)
ps_GM_pruned
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 17 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 35 sample variables ]
## tax_table()   Taxonomy Table:    [ 17 taxa by 1 taxonomic ranks ]
# 17 taxa and 125 samples

# Pick relative abundances (compositional) and sample metadata 
ps_RA <- ps_GM_pruned
otu <- abundances(ps_RA)
meta <- meta(ps_RA)

3. Bray distance

# note! the distance matrix is now at genus level!
ps_RA_bray <- phyloseq::distance(ps_RA, method = "bray")

GP.ord <- ordinate(ps_RA, "PCoA", "bray")
p1 = plot_ordination(ps_RA, GP.ord, type="samples", color="sample_type", shape = "depth")
print(p1)

4. PERMANOVA

4.1. Management

# first with just soil type and strata option
a <- adonis2(formula = ps_RA_bray~ sample_type, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$depth)
print(a)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Blocks:  strata 
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ sample_type, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$depth)
##              Df SumOfSqs      R2     F Pr(>F)    
## sample_type   3    4.029 0.11393 5.186  1e-04 ***
## Residual    121   31.336 0.88607                 
## Total       124   35.366 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.2. Depth

# then with just depth and strata option
a <- adonis2(formula = ps_RA_bray~ depth, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$sample_type)
print(a)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Blocks:  strata 
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ depth, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$sample_type)
##           Df SumOfSqs      R2      F Pr(>F)    
## depth      4    2.978 0.08421 2.7584  2e-04 ***
## Residual 120   32.388 0.91579                  
## Total    124   35.366 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The AMF communities at genus level differed more between management types (PERMANOVA; R2 = 0.114; p = 0.000) than between soil layers (PERMANOVA; R2 = 0.084; p = 0.000).

4.3. Both (not used)

# For the full model it matters which "by" option we choose. When by="terms" will assess significance for each term sequentially from first to last, so that the order of terms matter. We will use this, because with sequential analysis we will get R2 values that sum up to 1, and will also get the significance and R2 values for each interaction term separately rather than for the interaction alone

# because sample type had larger R2 I will put it first in the model
# note interaction term was not significant!

final <- adonis2(formula = ps_RA_bray ~ sample_type + depth, data = meta, permutations = 9999, method = "bray", by = "terms")
print(final)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ sample_type + depth, data = meta, permutations = 9999, method = "bray", by = "terms")
##              Df SumOfSqs      R2      F Pr(>F)    
## sample_type   3    4.029 0.11393 5.5646  1e-04 ***
## depth         4    3.097 0.08758 3.2082  2e-04 ***
## Residual    117   28.239 0.79849                  
## Total       124   35.366 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5. Pairwise PERMANOVA

5.1. By management (not used)

set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_bray,factors=meta$sample_type)
pair.mod
##                     pairs Df SumsOfSqs   F.Model         R2 p.value p.adjusted
## 1  conventional vs forest  1 1.1125799  3.748075 0.06973412   0.009      0.054
## 2  conventional vs meadow  1 1.2654046  4.477131 0.05563234   0.003      0.018
## 3 conventional vs organic  1 0.6465733  2.776924 0.03664604   0.022      0.132
## 4        forest vs meadow  1 0.6459536  2.162313 0.04310632   0.075      0.450
## 5       forest vs organic  1 1.6460377  7.515467 0.14310959   0.001      0.006
## 6       meadow vs organic  1 2.5882231 11.141036 0.13563301   0.001      0.006
##   sig
## 1    
## 2   .
## 3    
## 4    
## 5   *
## 6   *
x <- as.data.frame(pair.mod)
print(x)
##                     pairs Df SumsOfSqs   F.Model         R2 p.value p.adjusted
## 1  conventional vs forest  1 1.1125799  3.748075 0.06973412   0.009      0.054
## 2  conventional vs meadow  1 1.2654046  4.477131 0.05563234   0.003      0.018
## 3 conventional vs organic  1 0.6465733  2.776924 0.03664604   0.022      0.132
## 4        forest vs meadow  1 0.6459536  2.162313 0.04310632   0.075      0.450
## 5       forest vs organic  1 1.6460377  7.515467 0.14310959   0.001      0.006
## 6       meadow vs organic  1 2.5882231 11.141036 0.13563301   0.001      0.006
##   sig
## 1    
## 2   .
## 3    
## 4    
## 5   *
## 6   *
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

write.csv2(x, file = "AMF_Pairwise_PERMANOVA_by_MANAGEMENT.csv")

5.2. By depth (not used)

set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_bray,factors=meta$depth)
pair.mod
##                 pairs Df  SumsOfSqs   F.Model          R2 p.value p.adjusted
## 1   0...10 vs 10...20  1 0.08290182 0.3780362 0.006952001   0.844       1.00
## 2   0...10 vs 20...30  1 0.34215045 1.3034979 0.023569900   0.236       1.00
## 3   0...10 vs 30...40  1 1.17951804 4.3871337 0.080664918   0.006       0.06
## 4     0...10 vs 40...  1 1.18589735 4.2227763 0.089422448   0.005       0.05
## 5  10...20 vs 20...30  1 0.30058651 1.2249127 0.022180438   0.279       1.00
## 6  10...20 vs 30...40  1 1.30971371 5.2305082 0.094703242   0.003       0.03
## 7    10...20 vs 40...  1 1.55697160 6.0029207 0.122501284   0.002       0.02
## 8  20...30 vs 30...40  1 0.53352935 1.7961256 0.034676834   0.134       1.00
## 9    20...30 vs 40...  1 0.85776912 2.7351625 0.059804369   0.031       0.31
## 10   30...40 vs 40...  1 0.30239257 0.9246894 0.023160842   0.453       1.00
##    sig
## 1     
## 2     
## 3     
## 4    .
## 5     
## 6    .
## 7    .
## 8     
## 9     
## 10

6. PERMANOVA: Management effect at different depths

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

meta$depth <- as.factor(meta$depth)

for (i in meta$depth) {
  # subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")
ps_RA_subset <- subset_samples(ps_RA_subset, depth == i)

# lets make sure all missing taxa are removed
prev0 = apply(X = otu_table(ps_RA_subset),
              MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
# 12 taxa and 25 samples

meta_subset <- meta(ps_RA_subset)

ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")

set.seed(777)
x <- as.data.frame(pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type))
x
}

RESULTS STEP 13: AMF bubble plot

1. Load data and add soil_type_depth

library(vegan)
library(goeveg)
library(metagMisc)
library(phyloseq)
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
library(car)

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 35 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
# add sample_type_depth# add soil_type_depth
sample_data(ps)$soil_type_depth <- paste(sample_data(ps)$sample_type, "_", sample_data(ps)$depth)

meta <- meta(ps)

2. Keep only Glomeromycota

Note! first transform to RA and then filter AMF

# Relative abundance
ps_RA <- microbiome::transform(ps, 'compositional')
ps_RA
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 36 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
# keep only Glomeromycota
ps2_std_G <- subset_taxa(ps_RA, phylum=="Glomeromycota")
ps2_std_G
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 263 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 36 sample variables ]
## tax_table()   Taxonomy Table:    [ 263 taxa by 7 taxonomic ranks ]
# 263 taxa and 140 samples

3. Aggregate samples by soil_type_depth

For the bubble plot, we want to get average values based on soil_type_depth

library("metagMisc")

ps2_std_G <- phyloseq_average(
  ps2_std_G,
  avg_type = "arithmetic",
  group = "soil_type_depth",
  drop_group_zero = FALSE,
  verbose = FALSE,
  progress = NULL
)

ps2_std_G
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 263 taxa and 20 samples ]
## tax_table()   Taxonomy Table:    [ 263 taxa by 7 taxonomic ranks ]
# aggregate

ps2_std_G <- aggregate_rare(ps2_std_G, level = 'genus', detection = 0, prevalence = 0, include.lowest = TRUE)
ps2_std_G
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 17 taxa and 20 samples ]
## tax_table()   Taxonomy Table:    [ 17 taxa by 1 taxonomic ranks ]

4. Extract tax and otu tables

tax_table_G <- as.data.frame(as.matrix(tax_table(ps2_std_G)))
OTU_genus_table_G <- as.data.frame(as.matrix(otu_table(ps2_std_G)))

5. Add total AMF to otu table and make wide

# make OTUs into columns
OTU_genus_table_G <- t(OTU_genus_table_G)
OTU_genus_table_G <- as.data.frame(OTU_genus_table_G)

# add total of phylum glomeracea
OTU_genus_table_G <- cbind(
  OTU_genus_table_G,
  total = rowSums(OTU_genus_table_G
))

# Change your data structure from a wide format to a long format. Put any variables that are not OTUs/genus, into the id parameter

#convert data frame from a wide format to a long format
pc <- tibble::rownames_to_column(OTU_genus_table_G, "Sample")
pcm = melt(pc, id = c("Sample"))

6. Combine otu wide and meta

meta_x <- meta[!duplicated(meta$soil_type_depth),]
# Vector of column names to select
columns_to_keep <- c("depth", "sample_type", "soil_type_depth")

# Using base R to select columns
meta_x <- meta_x[, columns_to_keep]

rownames(meta_x) <- NULL

# add metadata
pcm <- left_join(pcm, meta_x, by=c("Sample"="soil_type_depth"))

# change the column name
colnames(pcm)[1] <- "soil_type_depth"

7. Reorder soil_type_depth

pcm$soil_type_depth <- as.factor(pcm$soil_type_depth)

# Change the order of samples
pcm$soil_type_depth <- factor(pcm$soil_type_depth,levels=c("forest _ 0...10", "forest _ 10...20", "forest _ 20...30", "forest _ 30...40", "forest _ 40...", "meadow _ 0...10", "meadow _ 10...20", "meadow _ 20...30", "meadow _ 30...40", "meadow _ 40...", "organic _ 0...10", "organic _ 10...20", "organic _ 20...30", "organic _ 30...40", "organic _ 40...", "conventional _ 0...10", "conventional _ 10...20", "conventional _ 20...30", "conventional _ 30...40", "conventional _ 40..."))

levels(pcm$soil_type_depth)
##  [1] "forest _ 0...10"        "forest _ 10...20"       "forest _ 20...30"      
##  [4] "forest _ 30...40"       "forest _ 40..."         "meadow _ 0...10"       
##  [7] "meadow _ 10...20"       "meadow _ 20...30"       "meadow _ 30...40"      
## [10] "meadow _ 40..."         "organic _ 0...10"       "organic _ 10...20"     
## [13] "organic _ 20...30"      "organic _ 30...40"      "organic _ 40..."       
## [16] "conventional _ 0...10"  "conventional _ 10...20" "conventional _ 20...30"
## [19] "conventional _ 30...40" "conventional _ 40..."

8. Add other taxonomic levels

# add the other taxonomic level annotations
TAX <- as.data.frame(as.matrix(tax_table(ps)))
row.names(TAX) <- NULL
TAX <- TAX[, 1:6]
TAX <- filter(TAX, phylum == "Glomeromycota")


TAX <- TAX[!duplicated(TAX$genus),]
pcm2 <- left_join(pcm, TAX, by = c("variable" = "genus"))

9. Add the taxonomic rank letter to genus

pcm2$variable[pcm2$variable == "Ambisporaceae_unclassified"] <- "Ambisporaceae (f)"
pcm2$variable[pcm2$variable == "Archaeospora"] <- "Archaeospora (g)"
pcm2$variable[pcm2$variable == "Archaeosporaceae_unclassified"] <- "Archaeosporaceae (f)"
pcm2$variable[pcm2$variable == "Archaeosporales_unclassified"] <- "Archaeosporales (o)"
pcm2$variable[pcm2$variable == "Diversispora"] <- "Diversispora (g)"
pcm2$variable[pcm2$variable == "Claroideoglomus"] <- "Claroideoglomus (g)"
pcm2$variable[pcm2$variable == "Entrophospora"] <- "Entrophospora (g)"
pcm2$variable[pcm2$variable == "Claroideoglomeraceae_unclassified"] <- "Claroideoglomeraceae (f)"
pcm2$variable[pcm2$variable == "Dominikia"] <- "Dominikia (g)"
pcm2$variable[pcm2$variable == "Funneliformis"] <- "Funneliformis (g)"
pcm2$variable[pcm2$variable == "Glomeraceae_unclassified"] <- "Glomeraceae (f)"
pcm2$variable[pcm2$variable == "Glomus"] <- "Glomus (g)"
pcm2$variable[pcm2$variable == "Microdominikia"] <- "Microdominikia (g)"
pcm2$variable[pcm2$variable == "Rhizophagus"] <- "Rhizophagus (g)"
pcm2$variable[pcm2$variable == "Glomeromycota_unclassified"] <- "Glomeromycota (p)"
pcm2$variable[pcm2$variable == "Paraglomus"] <- "Paraglomus (g)"
pcm2$variable[pcm2$variable == "Paraglomerales_unclassified"] <- "Paraglomerales (o)"
pcm2$variable[pcm2$variable == "total"] <- "total"
# add total to family
pcm2$family <- pcm2$family %>% replace_na('total')

10. Change family names

pcm2$family[pcm2$family=="Ambisporaceae"] <- "Ambisporaceae (f)"
pcm2$family[pcm2$family=="Archaeosporaceae"] <- "Archaeosporaceae (f)"
pcm2$family[pcm2$family=="Archaeosporales_unclassified"] <- "Archaeosporales (o)"
pcm2$family[pcm2$family=="Diversisporaceae"] <- "Diversisporaceae (f)"
pcm2$family[pcm2$family=="Entrophosporaceae"] <- "Entrophosporaceae (f)"
pcm2$family[pcm2$family=="Claroideoglomeraceae"] <- "Claroideoglomeraceae (f)"
pcm2$family[pcm2$family=="Glomeraceae"] <- "Glomeraceae (f)"
pcm2$family[pcm2$family=="Glomeromycota_unclassified"] <- "Glomeromycota (p)"
pcm2$family[pcm2$family=="Paraglomeraceae"] <- "Paraglomeraceae (f)"
pcm2$family[pcm2$family=="Paraglomerales_unclassified"] <- "Paraglomerales (o)"
# make into factor
pcm2$family <- as.factor(pcm2$family)

levels(pcm2$family)
##  [1] "Ambisporaceae (f)"        "Archaeosporaceae (f)"    
##  [3] "Archaeosporales (o)"      "Claroideoglomeraceae (f)"
##  [5] "Diversisporaceae (f)"     "Entrophosporaceae (f)"   
##  [7] "Glomeraceae (f)"          "Glomeromycota (p)"       
##  [9] "Paraglomeraceae (f)"      "Paraglomerales (o)"      
## [11] "total"

11. Change Family order

# Change level family

pcm2$family <- factor(pcm2$family, levels = c("Ambisporaceae (f)", "Archaeosporaceae (f)", "Diversisporaceae (f)", "Glomeraceae (f)", "Claroideoglomeraceae (f)", "Entrophosporaceae (f)", "Paraglomeraceae (f)", "Glomeromycota (p)", "Archaeosporales (o)", "Paraglomerales (o)", "total"))

12. Change genus order

# make variable into factor
pcm2$variable <- as.factor(pcm2$variable)
# Change genus level order

pcm2$variable <- factor(pcm2$variable, levels = c("Archaeosporales (o)", "Ambisporaceae (f)", "Archaeosporaceae (f)", "Archaeospora (g)", "Diversispora (g)", "Glomeraceae (f)", "Dominikia (g)", "Funneliformis (g)", "Glomus (g)", "Microdominikia (g)", "Rhizophagus (g)", "Claroideoglomeraceae (f)", "Claroideoglomus (g)", "Entrophospora (g)", "Paraglomerales (o)", "Paraglomus (g)", "Glomeromycota (p)", "total"))

levels(pcm2$variable)
##  [1] "Archaeosporales (o)"      "Ambisporaceae (f)"       
##  [3] "Archaeosporaceae (f)"     "Archaeospora (g)"        
##  [5] "Diversispora (g)"         "Glomeraceae (f)"         
##  [7] "Dominikia (g)"            "Funneliformis (g)"       
##  [9] "Glomus (g)"               "Microdominikia (g)"      
## [11] "Rhizophagus (g)"          "Claroideoglomeraceae (f)"
## [13] "Claroideoglomus (g)"      "Entrophospora (g)"       
## [15] "Paraglomerales (o)"       "Paraglomus (g)"          
## [17] "Glomeromycota (p)"        "total"

13. Assign AMF guild based on AMF family

pcm2$AMF_guild <- NA

pcm2$AMF_guild[pcm2$family=="Ambisporaceae (f)"] <- "ancestral"
pcm2$AMF_guild[pcm2$family=="Archaeosporaceae (f)"] <- "ancestral"
pcm2$AMF_guild[pcm2$family=="Archaeosporales (o)"] <- "unknown"
pcm2$AMF_guild[pcm2$family=="Diversisporaceae (f)"] <- "edaphophilic"
pcm2$AMF_guild[pcm2$family=="Claroideoglomeraceae (f)"] <- "rhizophilic"
pcm2$AMF_guild[pcm2$family=="Entrophosporaceae (f)"] <- "rhizophilic"
pcm2$AMF_guild[pcm2$family=="Glomeraceae (f)"] <- "rhizophilic"
pcm2$AMF_guild[pcm2$family=="Glomeromycota (p)"] <- "unknown"
pcm2$AMF_guild[pcm2$family=="Paraglomeraceae (f)"] <- "rhizophilic"
pcm2$AMF_guild[pcm2$family=="Paraglomerales (o)"] <- "unknown"

14. Add new depth

pcm2$new_depth <- NA
pcm2$new_depth[pcm2$depth=="0...10"]<-"0-10"
pcm2$new_depth[pcm2$depth=="10...20"]<-"10-20"
pcm2$new_depth[pcm2$depth=="20...30"]<-"20-30"
pcm2$new_depth[pcm2$depth=="30...40"]<-"30-40"
pcm2$new_depth[pcm2$depth=="40..."]<-"40-80"

15. Modify tax level: Order

Currently, AMF are divided in five orders (Archaeosporales, Diversisporales, Glomerales, Entrophosporales and Paraglomerales)

# modifying the column of data frame
pcm2$order <- as.factor(pcm2$order)

levels(pcm2$order)
## [1] "Archaeosporales"            "Diversisporales"           
## [3] "Entrophosporales"           "Glomerales"                
## [5] "Glomeromycota_unclassified" "Paraglomerales"
# Change levels
pcm2$order <- factor(pcm2$order, levels = c("Archaeosporales", "Diversisporales", "Entrophosporales", "Glomerales", "Paraglomerales", "Glomeromycota_unclassified"), labels = c("Archaeosporales (o)", "Diversisporales (o)", "Entrophosporales (o)", "Glomerales (o)", "Paraglomerales (o)", "Glomeromycota (p)"))

# add order = Glomeromycota to total
pcm2$order <- pcm2$order %>% replace_na('Glomeromycota (p)')

levels(pcm2$order)
## [1] "Archaeosporales (o)"  "Diversisporales (o)"  "Entrophosporales (o)"
## [4] "Glomerales (o)"       "Paraglomerales (o)"   "Glomeromycota (p)"

16. Plot the Bubble plot

For a bubble plot, you are using geom_point and scaling the size to your value (relative abundance) column.

I checked that: - thesmallest non-zero is 8.314653e-07 or 0.0000008314653 - and the biggest value is 1.164825e-01 or 0.1164825

So lets set the limits in the figure accordingly

# color for AMF_guild
MyPalette = c("red",  "blue", "#ff028d", "black")

# where ("Ancestral" = "red", "Rhizophilic" = "#ff028d", "Edaphophilic" =  "blue", "Unknown" = "black")

xx2 = ggplot(pcm2, aes(x = new_depth, y = variable)) + 
  geom_point(aes(size = value, fill = order, color = AMF_guild), shape = 21, alpha = 1, stroke = 0) + 
  scale_fill_manual(values=c("#99CC99", "#83adb5", "#692D6B", "#D094D2", "#cba69e", "#A9A9A9")) + 
  scale_size_continuous(limits = c(0.0000001, 0.2), range = c(3,30), breaks = c(0.0000001, 0.0001, 0.001, 0.01, 0.2)) + 
  labs( x= "depth (cm)", y = "", size = "Relative abundance", fill = "", color = "AMF guild")  + 
  theme(legend.key=element_blank(), 
        axis.text.x = element_text(colour = "black", size = 14, angle = 45, vjust = 1, hjust = 1),
        axis.title.x=element_text(colour = "black", size = 14,face="bold"), 
        axis.text.y = element_text(colour = "black", size = 14), 
        legend.text = element_text(size = 14, colour ="black"), 
        legend.title = element_text(size = 16, face = "bold"), 
        panel.background = element_blank(), panel.border = element_rect(colour = "black", fill = NA, size = 1.2)) + facet_wrap(vars(sample_type), nrow = 1, ncol = 4) +
  scale_y_discrete(limits = rev(levels(pcm2$variable))) + guides(fill = guide_legend(order = 1, ncol = 2, override.aes = list(size = 8, shape = 21)), color = guide_legend(order = 2, ncol = 2, override.aes = list(shape = 15, size = 8, stroke = 2)))
#+ theme(legend.margin=margin(1,1,1,1), legend.box.spacing = unit(1.6, "pt"))
#+ theme(legend.title.align=0.5)#+ theme(legend.position="bottom")
f1 <- xx2 + scale_color_manual(values = (MyPalette)) + theme(strip.text = element_text(size = 16, color = "black"))
#+  guides(color = guide_legend(nrow = 2, override.aes = list(shape = 15, size = 2, stroke = 2)))   #+ coord_flip() + guide_legend(ncol=2)

### change y axis label colors based on AMF guild

f2 <- f1 + theme(axis.text.y = element_text(color = c("black", "black", "#ff028d", "black", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "blue", "red", "red", "red", "black")))

print(f2)

Save with 1400 width and 550 height

17. Statistical testing

I have previously tested the AMF genera, family, order and guild, but no other difference was found other than:

More Ambisporaceae in forest compared to other Wilcoxon).

So I will not include the tests here, They were done similarly as before e.g. for AMF relative abundance.


RESULTS STEP 14: Spearman correlations with soil properties and fungal and AMF richness as well as with AMF relative abundance

Simple spearman rank correlation with richness and env. variables are done WITHOUT forest because forest soil is so different environment compared to meadow, organic and conventional soils

library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 35 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]

1. remove forest

ps_nf <- subset_samples(ps, sample_type!= "forest")

meta <- meta(ps_nf)
names(meta)
##  [1] "sampleID"             "plot"                 "sampling_position"   
##  [4] "actual_sample_depth"  "depth"                "depth_numerical"     
##  [7] "vegetation"           "sample_type"          "root_mgg"            
## [10] "pH_H2O"               "EC_uScm"              "C_g_per_kg"          
## [13] "N_gkg"                "TP_gkg"               "Alox_mmolkg"         
## [16] "Feox_mmolkg"          "oxides_mmolkg"        "PH2O_mgkg"           
## [19] "Porg_mgkg"            "DOC_mgkg"             "Pinorg_mgkg"         
## [22] "C_per_N"              "observed"             "chao1"               
## [25] "shannon"              "observed_sng"         "chao1_sng"           
## [28] "shannon_sng"          "log_root"             "reads"               
## [31] "OTUs"                 "saprotroph_richness"  "symbiotroph_richness"
## [34] "pathotroph_richness"  "AMF_richness"

2. Spearman for richnesses

Soil properties to test against:

C_per_N pH_H2O C_g_per_kg Feox_mmolkg DOC_mgkg N_gkg depth_numerical Porg_mgkg log_root TP_gkg Pinorg_mgkg Alox_mmolkg PH2O_mgkg

2.1. Observed richness

env <- c("C_per_N", "pH_H2O", "C_g_per_kg", "Feox_mmolkg", "DOC_mgkg", "N_gkg", "depth_numerical", "Porg_mgkg", "log_root", "TP_gkg", "Pinorg_mgkg", "Alox_mmolkg", "PH2O_mgkg")

for (i in env) {
   # Filter out rows with NA values in the columns of interest
  valid_data <- meta[!is.na(meta$observed) & !is.na(meta[[i]]), ]
  
  # Perform Spearman correlation test
  x <- cor.test(valid_data$observed, valid_data[[i]], method = "spearman")
  
  # Print the result
  print(paste("Correlation test for:", i))
  print(x)
  }
## [1] "Correlation test for: C_per_N"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 122420, p-value = 7.695e-15
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.6239005 
## 
## [1] "Correlation test for: pH_H2O"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 543813, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.6707008 
## 
## [1] "Correlation test for: C_g_per_kg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 98808, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.6964415 
## 
## [1] "Correlation test for: Feox_mmolkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 145648, p-value = 2.38e-11
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5525416 
## 
## [1] "Correlation test for: DOC_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 107044, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.6711408 
## 
## [1] "Correlation test for: N_gkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 103654, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.6815556 
## 
## [1] "Correlation test for: depth_numerical"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 568043, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.7451402 
## 
## [1] "Correlation test for: Porg_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 108419, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.6587925 
## 
## [1] "Correlation test for: log_root"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 147129, p-value = 3.731e-11
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5479921 
## 
## [1] "Correlation test for: TP_gkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 134591, p-value = 6.626e-13
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5865102 
## 
## [1] "Correlation test for: Pinorg_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 361273, p-value = 0.2224
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.1099019 
## 
## [1] "Correlation test for: Alox_mmolkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 130503, p-value = 1.582e-13
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##     rho 
## 0.59907 
## 
## [1] "Correlation test for: PH2O_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 258057, p-value = 0.03667
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.1878602

2.2. AMF_richness

env <- c("C_per_N", "pH_H2O", "C_g_per_kg", "Feox_mmolkg", "DOC_mgkg", "N_gkg", "depth_numerical", "Porg_mgkg", "log_root", "TP_gkg", "Pinorg_mgkg", "Alox_mmolkg", "PH2O_mgkg")

for (i in env) {
   # Filter out rows with NA values in the columns of interest
  valid_data <- meta[!is.na(meta$AMF_richness) & !is.na(meta[[i]]), ]
  
  # Perform Spearman correlation test
  x <- cor.test(valid_data$AMF_richness, valid_data[[i]], method = "spearman")
  
  # Print the result
  print(paste("Correlation test for:", i))
  print(x)
  }
## [1] "Correlation test for: C_per_N"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 142617, p-value = 9.284e-12
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5618529 
## 
## [1] "Correlation test for: pH_H2O"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 512461, p-value = 2.496e-12
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.5743796 
## 
## [1] "Correlation test for: C_g_per_kg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 159586, p-value = 1.27e-09
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5097203 
## 
## [1] "Correlation test for: Feox_mmolkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 124010, p-value = 1.425e-14
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.6190164 
## 
## [1] "Correlation test for: DOC_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 157639, p-value = 7.53e-10
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5157014 
## 
## [1] "Correlation test for: N_gkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 176552, p-value = 8.089e-08
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.4575968 
## 
## [1] "Correlation test for: depth_numerical"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 453384, p-value = 5.841e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.3928852 
## 
## [1] "Correlation test for: Porg_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 204283, p-value = 4.676e-05
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.3570961 
## 
## [1] "Correlation test for: log_root"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 189803, p-value = 1.324e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.4168873 
## 
## [1] "Correlation test for: TP_gkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 252601, p-value = 0.01205
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.2239608 
## 
## [1] "Correlation test for: Pinorg_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 384409, p-value = 0.0434
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.1809802 
## 
## [1] "Correlation test for: Alox_mmolkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 133398, p-value = 4.39e-13
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5901751 
## 
## [1] "Correlation test for: PH2O_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 349401, p-value = 0.271
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##         rho 
## -0.09961067

3. Spearman for AMF RA

ps_nf_RA <- transform(ps_nf, "compositional")
ps_nf_RA
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 35 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
gm <- aggregate_rare(ps_nf_RA, level = "phylum", detection = 0, prevalence = 0 )
gm
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 14 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 35 sample variables ]
## tax_table()   Taxonomy Table:    [ 14 taxa by 1 taxonomic ranks ]
df <- psmelt(gm)
df <- subset(df, OTU == "Glomeromycota")

env <- c("C_per_N", "pH_H2O", "C_g_per_kg", "Feox_mmolkg", "DOC_mgkg", "N_gkg", "depth_numerical", "Porg_mgkg", "log_root", "TP_gkg", "Pinorg_mgkg", "Alox_mmolkg", "PH2O_mgkg")

for (i in env) {
  # Perform Spearman correlation test
  x <- cor.test(df$Abundance, df[[i]], method = "spearman", na.rm = TRUE)
  
  # Print the result
  print(paste("Correlation test for:", i))
  print(x)
  }
## [1] "Correlation test for: C_per_N"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 338926, p-value = 0.6479
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##         rho 
## -0.04124707 
## 
## [1] "Correlation test for: pH_H2O"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 305311, p-value = 0.492
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.06202587 
## 
## [1] "Correlation test for: C_g_per_kg"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 369419, p-value = 0.1336
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.1349274 
## 
## [1] "Correlation test for: Feox_mmolkg"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 320234, p-value = 0.8579
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.01617915 
## 
## [1] "Correlation test for: DOC_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 377055, p-value = 0.0777
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## -0.158386 
## 
## [1] "Correlation test for: N_gkg"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 384219, p-value = 0.0441
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.1803965 
## 
## [1] "Correlation test for: depth_numerical"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 257422, p-value = 0.01924
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.2091478 
## 
## [1] "Correlation test for: Porg_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 394534, p-value = 0.006855
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.2416477 
## 
## [1] "Correlation test for: log_root"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 368781, p-value = 0.1393
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.1329676 
## 
## [1] "Correlation test for: TP_gkg"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 430932, p-value = 0.0002288
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## -0.323907 
## 
## [1] "Correlation test for: Pinorg_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 348926, p-value = 0.4251
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##         rho 
## -0.07197018 
## 
## [1] "Correlation test for: Alox_mmolkg"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 313002, p-value = 0.6707
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.03839783 
## 
## [1] "Correlation test for: PH2O_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 443246, p-value = 5.642e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.3949508